llama.cpp
.devops
nix
apps.nix devshells.nix docker.nix jetson-support.nix nixpkgs-instances.nix package-gguf-py.nix package.nix python-scripts.nix scope.nix sif.nix.github
ISSUE_TEMPLATE
010-bug-compilation.yml 011-bug-results.yml 019-bug-misc.yml 020-enhancement.yml 030-research.yml 040-refactor.yml config.ymlworkflows
bench.yml.disabled build-cache.yml build-cmake-pkg.yml build-linux-cross.yml build.yml check-vendor.yml close-issue.yml copilot-setup-steps.yml docker.yml editorconfig.yml gguf-publish.yml labeler.yml pre-tokenizer-hashes.yml python-check-requirements.yml python-lint.yml python-type-check.yml release.yml server-metal.yml server-webui.yml server.yml update-ops-docs.yml winget.ymlbenches
cmake
arm64-apple-clang.cmake arm64-windows-llvm.cmake build-info.cmake common.cmake download-models.cmake git-vars.cmake license.cmake llama-config.cmake.in llama.pc.in riscv64-spacemit-linux-gnu-gcc.cmake x64-windows-llvm.cmakecommon
jinja
README.md caps.cpp caps.h lexer.cpp lexer.h parser.cpp parser.h runtime.cpp runtime.h string.cpp string.h utils.h value.cpp value.hdocs
multimodal
MobileVLM.md gemma3.md glmedge.md granitevision.md llava.md minicpmo2.6.md minicpmo4.0.md minicpmv2.5.md minicpmv2.6.md minicpmv4.0.md minicpmv4.5.mdops
BLAS.csv CANN.csv CPU.csv CUDA.csv Metal.csv OpenCL.csv SYCL.csv Vulkan.csv WebGPU.csv ZenDNN.csv zDNN.csvexamples
llama.android
app
src
lib
.gitignore build.gradle.kts consumer-rules.pro proguard-rules.promodel-conversion
scripts
causal
compare-embeddings-logits.sh compare-logits.py convert-model.sh modelcard.template run-casual-gen-embeddings-org.py run-converted-model-embeddings-logits.sh run-converted-model.sh run-org-model.pyembedding
compare-embeddings-logits.sh convert-model.sh modelcard.template run-converted-model.sh run-original-model.pyutils
__init__.py check-nmse.py common.py compare_tokens.py create-collection-add-model.sh curl-embedding-server.sh hf-add-model-to-collection.py hf-create-collection.py hf-create-model.py hf-upload-gguf-model.py inspect-converted-model.sh inspect-org-model.py perplexity-gen.sh perplexity-run-simple.sh perplexity-run.sh quantize.sh run-embedding-server.sh semantic_check.py tensor-info.pysycl
CMakeLists.txt README.md build.sh ls-sycl-device.cpp run-llama2.sh test.sh win-build-sycl.bat win-run-llama2.bat win-test.batggml
include
ggml-alloc.h ggml-backend.h ggml-blas.h ggml-cann.h ggml-cpp.h ggml-cpu.h ggml-cuda.h ggml-hexagon.h ggml-metal.h ggml-opencl.h ggml-opt.h ggml-rpc.h ggml-sycl.h ggml-virtgpu.h ggml-vulkan.h ggml-webgpu.h ggml-zdnn.h ggml-zendnn.h ggml.h gguf.hsrc
ggml-cann
CMakeLists.txt acl_tensor.cpp acl_tensor.h aclnn_ops.cpp aclnn_ops.h common.h ggml-cann.cppggml-cpu
CMakeLists.txt arch-fallback.h binary-ops.cpp binary-ops.h common.h ggml-cpu-impl.h ggml-cpu.c ggml-cpu.cpp hbm.cpp hbm.h ops.cpp ops.h quants.c quants.h repack.cpp repack.h simd-mappings.h traits.cpp traits.h unary-ops.cpp unary-ops.h vec.cpp vec.hggml-cuda
template-instances
fattn-mma-f16-instance-ncols1_1-ncols2_16.cu fattn-mma-f16-instance-ncols1_1-ncols2_32.cu fattn-mma-f16-instance-ncols1_1-ncols2_8.cu fattn-mma-f16-instance-ncols1_16-ncols2_1.cu fattn-mma-f16-instance-ncols1_16-ncols2_2.cu fattn-mma-f16-instance-ncols1_16-ncols2_4.cu fattn-mma-f16-instance-ncols1_2-ncols2_16.cu fattn-mma-f16-instance-ncols1_2-ncols2_32.cu fattn-mma-f16-instance-ncols1_2-ncols2_4.cu fattn-mma-f16-instance-ncols1_2-ncols2_8.cu fattn-mma-f16-instance-ncols1_32-ncols2_1.cu fattn-mma-f16-instance-ncols1_32-ncols2_2.cu fattn-mma-f16-instance-ncols1_4-ncols2_16.cu fattn-mma-f16-instance-ncols1_4-ncols2_2.cu fattn-mma-f16-instance-ncols1_4-ncols2_4.cu fattn-mma-f16-instance-ncols1_4-ncols2_8.cu fattn-mma-f16-instance-ncols1_64-ncols2_1.cu fattn-mma-f16-instance-ncols1_8-ncols2_1.cu fattn-mma-f16-instance-ncols1_8-ncols2_2.cu fattn-mma-f16-instance-ncols1_8-ncols2_4.cu fattn-mma-f16-instance-ncols1_8-ncols2_8.cu fattn-tile-instance-dkq112-dv112.cu fattn-tile-instance-dkq128-dv128.cu fattn-tile-instance-dkq256-dv256.cu fattn-tile-instance-dkq40-dv40.cu fattn-tile-instance-dkq576-dv512.cu fattn-tile-instance-dkq64-dv64.cu fattn-tile-instance-dkq72-dv72.cu fattn-tile-instance-dkq80-dv80.cu fattn-tile-instance-dkq96-dv96.cu fattn-vec-instance-f16-f16.cu fattn-vec-instance-f16-q4_0.cu fattn-vec-instance-f16-q4_1.cu fattn-vec-instance-f16-q5_0.cu fattn-vec-instance-f16-q5_1.cu fattn-vec-instance-f16-q8_0.cu fattn-vec-instance-q4_0-f16.cu fattn-vec-instance-q4_0-q4_0.cu fattn-vec-instance-q4_0-q4_1.cu fattn-vec-instance-q4_0-q5_0.cu fattn-vec-instance-q4_0-q5_1.cu fattn-vec-instance-q4_0-q8_0.cu fattn-vec-instance-q4_1-f16.cu fattn-vec-instance-q4_1-q4_0.cu fattn-vec-instance-q4_1-q4_1.cu fattn-vec-instance-q4_1-q5_0.cu fattn-vec-instance-q4_1-q5_1.cu fattn-vec-instance-q4_1-q8_0.cu fattn-vec-instance-q5_0-f16.cu fattn-vec-instance-q5_0-q4_0.cu fattn-vec-instance-q5_0-q4_1.cu fattn-vec-instance-q5_0-q5_0.cu fattn-vec-instance-q5_0-q5_1.cu fattn-vec-instance-q5_0-q8_0.cu fattn-vec-instance-q5_1-f16.cu fattn-vec-instance-q5_1-q4_0.cu fattn-vec-instance-q5_1-q4_1.cu fattn-vec-instance-q5_1-q5_0.cu fattn-vec-instance-q5_1-q5_1.cu fattn-vec-instance-q5_1-q8_0.cu fattn-vec-instance-q8_0-f16.cu fattn-vec-instance-q8_0-q4_0.cu fattn-vec-instance-q8_0-q4_1.cu fattn-vec-instance-q8_0-q5_0.cu fattn-vec-instance-q8_0-q5_1.cu fattn-vec-instance-q8_0-q8_0.cu generate_cu_files.py mmf-instance-ncols_1.cu mmf-instance-ncols_10.cu mmf-instance-ncols_11.cu mmf-instance-ncols_12.cu mmf-instance-ncols_13.cu mmf-instance-ncols_14.cu mmf-instance-ncols_15.cu mmf-instance-ncols_16.cu mmf-instance-ncols_2.cu mmf-instance-ncols_3.cu mmf-instance-ncols_4.cu mmf-instance-ncols_5.cu mmf-instance-ncols_6.cu mmf-instance-ncols_7.cu mmf-instance-ncols_8.cu mmf-instance-ncols_9.cu mmq-instance-iq1_s.cu mmq-instance-iq2_s.cu mmq-instance-iq2_xs.cu mmq-instance-iq2_xxs.cu mmq-instance-iq3_s.cu mmq-instance-iq3_xxs.cu mmq-instance-iq4_nl.cu mmq-instance-iq4_xs.cu mmq-instance-mxfp4.cu mmq-instance-q2_k.cu mmq-instance-q3_k.cu mmq-instance-q4_0.cu mmq-instance-q4_1.cu mmq-instance-q4_k.cu mmq-instance-q5_0.cu mmq-instance-q5_1.cu mmq-instance-q5_k.cu mmq-instance-q6_k.cu mmq-instance-q8_0.cuggml-hexagon
htp
CMakeLists.txt act-ops.c argsort-ops.c binary-ops.c cmake-toolchain.cmake cpy-ops.c flash-attn-ops.c get-rows-ops.c hex-dma.c hex-dma.h hex-dump.h hex-fastdiv.h hex-utils.h htp-ctx.h htp-msg.h htp-ops.h htp_iface.idl hvx-arith.h hvx-base.h hvx-copy.h hvx-div.h hvx-dump.h hvx-exp.h hvx-floor.h hvx-inverse.h hvx-reduce.h hvx-scale.h hvx-sigmoid.h hvx-sqrt.h hvx-types.h hvx-utils.h main.c matmul-ops.c rope-ops.c set-rows-ops.c softmax-ops.c sum-rows-ops.c unary-ops.c worker-pool.c worker-pool.hggml-metal
CMakeLists.txt ggml-metal-common.cpp ggml-metal-common.h ggml-metal-context.h ggml-metal-context.m ggml-metal-device.cpp ggml-metal-device.h ggml-metal-device.m ggml-metal-impl.h ggml-metal-ops.cpp ggml-metal-ops.h ggml-metal.cpp ggml-metal.metalggml-opencl
kernels
add.cl add_id.cl argsort.cl clamp.cl concat.cl conv2d.cl conv2d_f16_f32.cl cpy.cl cvt.cl diag_mask_inf.cl div.cl embed_kernel.py expm1.cl fill.cl flash_attn_f16.cl flash_attn_f32.cl flash_attn_f32_f16.cl gelu.cl gemm_moe_mxfp4_f32.cl gemv_moe_mxfp4_f32.cl gemv_noshuffle.cl gemv_noshuffle_general.cl gemv_noshuffle_general_q8_0_f32.cl get_rows.cl glu.cl group_norm.cl im2col_f16.cl im2col_f32.cl mean.cl mul.cl mul_mat_Ab_Bi_8x4.cl mul_mat_f16_f32.cl mul_mm_f16_f32_kq_kqv.cl mul_mm_f16_f32_l4_lm.cl mul_mm_f32_f32_l4_lm.cl mul_mm_q6_k_f32_l4_lm.cl mul_mm_q8_0_f32_8x4.cl mul_mm_q8_0_f32_l4_lm.cl mul_mv_f16_f16.cl mul_mv_f16_f32.cl mul_mv_f16_f32_1row.cl mul_mv_f16_f32_l4.cl mul_mv_f32_f32.cl mul_mv_id_mxfp4_f32.cl mul_mv_id_mxfp4_f32_flat.cl mul_mv_id_q4_0_f32_8x_flat.cl mul_mv_id_q8_0_f32.cl mul_mv_id_q8_0_f32_flat.cl mul_mv_mxfp4_f32.cl mul_mv_mxfp4_f32_flat.cl mul_mv_q4_0_f32.cl mul_mv_q4_0_f32_1d_16x_flat.cl mul_mv_q4_0_f32_1d_8x_flat.cl mul_mv_q4_0_f32_8x_flat.cl mul_mv_q4_0_f32_v.cl mul_mv_q4_k_f32.cl mul_mv_q6_k_f32.cl mul_mv_q6_k_f32_flat.cl mul_mv_q8_0_f32.cl mul_mv_q8_0_f32_flat.cl norm.cl pad.cl relu.cl repeat.cl rms_norm.cl rope.cl scale.cl set_rows.cl sigmoid.cl silu.cl softmax_4_f16.cl softmax_4_f32.cl softmax_f16.cl softmax_f32.cl softplus.cl solve_tri.cl sqr.cl sqrt.cl ssm_conv.cl sub.cl sum_rows.cl tanh.cl transpose.cl tri.cl tsembd.cl upscale.clggml-sycl
CMakeLists.txt add-id.cpp add-id.hpp backend.hpp binbcast.cpp binbcast.hpp common.cpp common.hpp concat.cpp concat.hpp conv.cpp conv.hpp convert.cpp convert.hpp count-equal.cpp count-equal.hpp cpy.cpp cpy.hpp dequantize.hpp dmmv.cpp dmmv.hpp element_wise.cpp element_wise.hpp gemm.hpp getrows.cpp getrows.hpp ggml-sycl.cpp gla.cpp gla.hpp im2col.cpp im2col.hpp mmq.cpp mmq.hpp mmvq.cpp mmvq.hpp norm.cpp norm.hpp outprod.cpp outprod.hpp pad.cpp pad.hpp pad_reflect_1d.cpp pad_reflect_1d.hpp presets.hpp quantize.hpp quants.hpp repeat_back.cpp repeat_back.hpp roll.cpp roll.hpp rope.cpp rope.hpp set.cpp set.hpp set_rows.cpp set_rows.hpp softmax.cpp softmax.hpp ssm_conv.cpp ssm_conv.hpp sycl_hw.cpp sycl_hw.hpp tsembd.cpp tsembd.hpp vecdotq.hpp wkv.cpp wkv.hppggml-virtgpu
backend
CMakeLists.txt apir_cs_ggml-rpc-back.cpp backend-convert.h backend-dispatched-backend.cpp backend-dispatched-buffer-type.cpp backend-dispatched-buffer.cpp backend-dispatched-device.cpp backend-dispatched.cpp backend-dispatched.gen.h backend-dispatched.h backend-virgl-apir.h backend.cppggml-vulkan
vulkan-shaders
CMakeLists.txt abs.comp acc.comp add.comp add1.comp add_id.comp arange.comp argmax.comp argsort.comp argsort_large.comp ceil.comp clamp.comp concat.comp contig_copy.comp conv2d_dw.comp conv2d_mm.comp conv_transpose_1d.comp copy.comp copy_from_quant.comp copy_to_quant.comp copy_transpose.comp cos.comp count_equal.comp count_experts.comp cumsum.comp cumsum_multipass1.comp cumsum_multipass2.comp dequant_f32.comp dequant_funcs.glsl dequant_funcs_cm2.glsl dequant_head.glsl dequant_iq1_m.comp dequant_iq1_s.comp dequant_iq2_s.comp dequant_iq2_xs.comp dequant_iq2_xxs.comp dequant_iq3_s.comp dequant_iq3_xxs.comp dequant_iq4_nl.comp dequant_iq4_xs.comp dequant_mxfp4.comp dequant_q2_k.comp dequant_q3_k.comp dequant_q4_0.comp dequant_q4_1.comp dequant_q4_k.comp dequant_q5_0.comp dequant_q5_1.comp dequant_q5_k.comp dequant_q6_k.comp dequant_q8_0.comp diag.comp diag_mask_inf.comp div.comp exp.comp fill.comp flash_attn.comp flash_attn_base.glsl flash_attn_cm1.comp flash_attn_cm2.comp flash_attn_mask_opt.comp flash_attn_split_k_reduce.comp floor.comp geglu.comp geglu_erf.comp geglu_quick.comp gelu.comp gelu_erf.comp gelu_quick.comp generic_binary_head.glsl generic_head.glsl generic_unary_head.glsl get_rows.comp get_rows_quant.comp glu_head.glsl glu_main.glsl group_norm.comp hardsigmoid.comp hardswish.comp im2col.comp im2col_3d.comp l2_norm.comp leaky_relu.comp log.comp mul.comp mul_mat_split_k_reduce.comp mul_mat_vec.comp mul_mat_vec_base.glsl mul_mat_vec_iface.glsl mul_mat_vec_iq1_m.comp mul_mat_vec_iq1_s.comp mul_mat_vec_iq2_s.comp mul_mat_vec_iq2_xs.comp mul_mat_vec_iq2_xxs.comp mul_mat_vec_iq3_s.comp mul_mat_vec_iq3_xxs.comp mul_mat_vec_nc.comp mul_mat_vec_p021.comp mul_mat_vec_q2_k.comp mul_mat_vec_q3_k.comp mul_mat_vec_q4_k.comp mul_mat_vec_q5_k.comp mul_mat_vec_q6_k.comp mul_mat_vecq.comp mul_mat_vecq_funcs.glsl mul_mm.comp mul_mm_cm2.comp mul_mm_funcs.glsl mul_mm_id_funcs.glsl mul_mmq.comp mul_mmq_funcs.glsl mul_mmq_shmem_types.glsl multi_add.comp neg.comp norm.comp opt_step_adamw.comp opt_step_sgd.comp pad.comp pool2d.comp quantize_q8_1.comp reglu.comp relu.comp repeat.comp repeat_back.comp rms_norm.comp rms_norm_back.comp rms_norm_partials.comp roll.comp rope_funcs.glsl rope_head.glsl rope_multi.comp rope_neox.comp rope_norm.comp rope_params.glsl rope_vision.comp round.comp rte.glsl scale.comp sigmoid.comp silu.comp silu_back.comp sin.comp soft_max.comp soft_max_back.comp soft_max_large1.comp soft_max_large2.comp soft_max_large3.comp soft_max_large_common.glsl softplus.comp solve_tri.comp sqrt.comp square.comp ssm_conv.comp ssm_scan.comp step.comp sub.comp sum_rows.comp sum_rows.glsl swiglu.comp swiglu_oai.comp tanh.comp timestep_embedding.comp topk_argsort.comp topk_moe.comp topk_nary_search.comp tri.comp trunc.comp types.glsl upscale.comp utils.glsl vulkan-shaders-gen.cpp wkv6.comp wkv7.comp xielu.compggml-webgpu
wgsl-shaders
argmax.wgsl argsort.wgsl argsort_merge.wgsl binary.wgsl common_decls.tmpl cpy.tmpl.wgsl cumsum.wgsl embed_wgsl.py flash_attn.wgsl get_rows.tmpl.wgsl glu.tmpl.wgsl memset.wgsl mul_mat.tmpl.wgsl mul_mat_decls.tmpl mul_mat_reg_tile.tmpl.wgsl mul_mat_subgroup_matrix.tmpl.wgsl mul_mat_vec.tmpl.wgsl pad.wgsl rms_norm.wgsl rope.tmpl.wgsl scale.tmpl.wgsl set_rows.wgsl soft_max.tmpl.wgsl sum_rows.wgsl unary.wgslgguf-py
gguf
scripts
gguf_convert_endian.py gguf_dump.py gguf_editor_gui.py gguf_hash.py gguf_new_metadata.py gguf_set_metadata.pygrammars
README.md arithmetic.gbnf c.gbnf chess.gbnf english.gbnf japanese.gbnf json.gbnf json_arr.gbnf list.gbnfmedia
llama0-banner.png llama0-logo.png llama1-banner.png llama1-icon-transparent.png llama1-icon-transparent.svg llama1-icon.png llama1-icon.svg llama1-logo.png llama1-logo.svg matmul.png matmul.svgmodels
templates
Apertus-8B-Instruct.jinja ByteDance-Seed-OSS.jinja CohereForAI-c4ai-command-r-plus-tool_use.jinja CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja GLM-4.6.jinja Kimi-K2-Instruct.jinja Kimi-K2-Thinking.jinja MiMo-VL.jinja MiniMax-M2.jinja Mistral-Small-3.2-24B-Instruct-2506.jinja NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja NVIDIA-Nemotron-Nano-v2.jinja NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja Qwen-QwQ-32B.jinja Qwen-Qwen2.5-7B-Instruct.jinja Qwen-Qwen3-0.6B.jinja Qwen3-Coder.jinja README.md deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja deepseek-ai-DeepSeek-V3.1.jinja fireworks-ai-llama-3-firefunction-v2.jinja google-gemma-2-2b-it.jinja ibm-granite-granite-3.3-2B-Instruct.jinja llama-cpp-deepseek-r1.jinja llama-cpp-lfm2.jinja llama-cpp-rwkv-world.jinja meetkai-functionary-medium-v3.1.jinja meetkai-functionary-medium-v3.2.jinja meta-llama-Llama-3.1-8B-Instruct.jinja meta-llama-Llama-3.2-3B-Instruct.jinja meta-llama-Llama-3.3-70B-Instruct.jinja microsoft-Phi-3.5-mini-instruct.jinja mistralai-Ministral-3-14B-Reasoning-2512.jinja mistralai-Mistral-Nemo-Instruct-2407.jinja moonshotai-Kimi-K2.jinja openai-gpt-oss-120b.jinja unsloth-Apriel-1.5.jinja unsloth-mistral-Devstral-Small-2507.jinja upstage-Solar-Open-100B.jinjarequirements
requirements-all.txt requirements-compare-llama-bench.txt requirements-convert_hf_to_gguf.txt requirements-convert_hf_to_gguf_update.txt requirements-convert_legacy_llama.txt requirements-convert_llama_ggml_to_gguf.txt requirements-convert_lora_to_gguf.txt requirements-gguf_editor_gui.txt requirements-pydantic.txt requirements-server-bench.txt requirements-test-tokenizer-random.txt requirements-tool_bench.txtscripts
bench-models.sh build-info.sh check-requirements.sh compare-commits.sh compare-llama-bench.py compare-logprobs.py create_ops_docs.py debug-test.sh fetch_server_test_models.py gen-authors.sh gen-unicode-data.py get-flags.mk get-hellaswag.sh get-pg.sh get-wikitext-103.sh get-wikitext-2.sh get-winogrande.sh get_chat_template.py hf.sh install-oneapi.bat pr2wt.sh serve-static.js server-bench.py sync-ggml-am.sh sync-ggml.last sync-ggml.sh sync_vendor.py tool_bench.py tool_bench.sh verify-checksum-models.py xxd.cmakesrc
models
afmoe.cpp apertus.cpp arcee.cpp arctic.cpp arwkv7.cpp baichuan.cpp bailingmoe.cpp bailingmoe2.cpp bert.cpp bitnet.cpp bloom.cpp chameleon.cpp chatglm.cpp codeshell.cpp cogvlm.cpp cohere2-iswa.cpp command-r.cpp dbrx.cpp deci.cpp deepseek.cpp deepseek2.cpp dots1.cpp dream.cpp ernie4-5-moe.cpp ernie4-5.cpp exaone-moe.cpp exaone.cpp exaone4.cpp falcon-h1.cpp falcon.cpp gemma-embedding.cpp gemma.cpp gemma2-iswa.cpp gemma3.cpp gemma3n-iswa.cpp glm4-moe.cpp glm4.cpp gpt2.cpp gptneox.cpp granite-hybrid.cpp granite.cpp graph-context-mamba.cpp grok.cpp grovemoe.cpp hunyuan-dense.cpp hunyuan-moe.cpp internlm2.cpp jais.cpp jamba.cpp kimi-linear.cpp lfm2.cpp llada-moe.cpp llada.cpp llama-iswa.cpp llama.cpp maincoder.cpp mamba.cpp mimo2-iswa.cpp minicpm3.cpp minimax-m2.cpp mistral3.cpp models.h modern-bert.cpp mpt.cpp nemotron-h.cpp nemotron.cpp neo-bert.cpp olmo.cpp olmo2.cpp olmoe.cpp openai-moe-iswa.cpp openelm.cpp orion.cpp pangu-embedded.cpp phi2.cpp phi3.cpp plamo.cpp plamo2.cpp plamo3.cpp plm.cpp qwen.cpp qwen2.cpp qwen2moe.cpp qwen2vl.cpp qwen3.cpp qwen35.cpp qwen35moe.cpp qwen3moe.cpp qwen3next.cpp qwen3vl-moe.cpp qwen3vl.cpp refact.cpp rnd1.cpp rwkv6-base.cpp rwkv6.cpp rwkv6qwen2.cpp rwkv7-base.cpp rwkv7.cpp seed-oss.cpp smallthinker.cpp smollm3.cpp stablelm.cpp starcoder.cpp starcoder2.cpp step35-iswa.cpp t5-dec.cpp t5-enc.cpp wavtokenizer-dec.cpp xverse.cpptests
peg-parser
simple-tokenize.cpp simple-tokenize.h test-basic.cpp test-gbnf-generation.cpp test-json-parser.cpp test-json-serialization.cpp test-unicode.cpp tests.htools
cvector-generator
CMakeLists.txt README.md completions.txt cvector-generator.cpp mean.hpp negative.txt pca.hpp positive.txtmtmd
legacy-models
convert_image_encoder_to_gguf.py glmedge-convert-image-encoder-to-gguf.py glmedge-surgery.py llava_surgery.py llava_surgery_v2.py minicpmv-convert-image-encoder-to-gguf.py minicpmv-surgery.pymodels
cogvlm.cpp conformer.cpp glm4v.cpp internvl.cpp kimik25.cpp kimivl.cpp llama4.cpp llava.cpp minicpmv.cpp mobilenetv5.cpp models.h pixtral.cpp qwen2vl.cpp qwen3vl.cpp siglip.cpp whisper-enc.cpp youtuvl.cppserver
public_legacy
colorthemes.css completion.js favicon.ico index-new.html index.html index.js json-schema-to-grammar.mjs loading.html prompt-formats.js style.css system-prompts.js theme-beeninorder.css theme-ketivah.css theme-mangotango.css theme-playground.css theme-polarnight.css theme-snowstorm.csspublic_simplechat
datautils.mjs index.html readme.md simplechat.css simplechat.js simplechat_screens.webp ui.mjstests
unit
test_basic.py test_chat_completion.py test_compat_anthropic.py test_compat_oai_responses.py test_completion.py test_ctx_shift.py test_embedding.py test_infill.py test_lora.py test_rerank.py test_router.py test_security.py test_sleep.py test_slot_save.py test_speculative.py test_template.py test_tokenize.py test_tool_call.py test_vision_api.pywebui
.storybook
ModeWatcherDecorator.svelte TooltipProviderDecorator.svelte main.ts preview.ts vitest.setup.tssrc
lib
components
app
chat
ChatAttachments
ChatAttachmentPreview.svelte ChatAttachmentThumbnailFile.svelte ChatAttachmentThumbnailImage.svelte ChatAttachmentsList.svelte ChatAttachmentsViewAll.svelteChatForm
ChatFormActions
ChatFormActionFileAttachments.svelte ChatFormActionRecord.svelte ChatFormActionSubmit.svelte ChatFormActions.svelteChatMessages
ChatMessage.svelte ChatMessageActions.svelte ChatMessageAssistant.svelte ChatMessageBranchingControls.svelte ChatMessageEditForm.svelte ChatMessageStatistics.svelte ChatMessageSystem.svelte ChatMessageThinkingBlock.svelte ChatMessageUser.svelte ChatMessages.svelteChatScreen
ChatScreen.svelte ChatScreenDragOverlay.svelte ChatScreenHeader.svelte ChatScreenProcessingInfo.sveltedialogs
DialogChatAttachmentPreview.svelte DialogChatAttachmentsViewAll.svelte DialogChatError.svelte DialogChatSettings.svelte DialogConfirmation.svelte DialogConversationSelection.svelte DialogConversationTitleUpdate.svelte DialogEmptyFileAlert.svelte DialogModelInformation.svelte DialogModelNotAvailable.sveltemisc
ActionButton.svelte ActionDropdown.svelte BadgeChatStatistic.svelte BadgeInfo.svelte BadgeModality.svelte CodePreviewDialog.svelte ConversationSelection.svelte CopyToClipboardIcon.svelte KeyboardShortcutInfo.svelte MarkdownContent.svelte RemoveButton.svelte SearchInput.svelte SyntaxHighlightedCode.svelteui
alert-dialog
alert-dialog-action.svelte alert-dialog-cancel.svelte alert-dialog-content.svelte alert-dialog-description.svelte alert-dialog-footer.svelte alert-dialog-header.svelte alert-dialog-overlay.svelte alert-dialog-title.svelte alert-dialog-trigger.svelte index.tscard
card-action.svelte card-content.svelte card-description.svelte card-footer.svelte card-header.svelte card-title.svelte card.svelte index.tsdialog
dialog-close.svelte dialog-content.svelte dialog-description.svelte dialog-footer.svelte dialog-header.svelte dialog-overlay.svelte dialog-title.svelte dialog-trigger.svelte index.tsdropdown-menu
dropdown-menu-checkbox-item.svelte dropdown-menu-content.svelte dropdown-menu-group-heading.svelte dropdown-menu-group.svelte dropdown-menu-item.svelte dropdown-menu-label.svelte dropdown-menu-radio-group.svelte dropdown-menu-radio-item.svelte dropdown-menu-separator.svelte dropdown-menu-shortcut.svelte dropdown-menu-sub-content.svelte dropdown-menu-sub-trigger.svelte dropdown-menu-trigger.svelte index.tspopover
index.ts popover-close.svelte popover-content.svelte popover-portal.svelte popover-trigger.svelte popover.svelteselect
index.ts select-content.svelte select-group-heading.svelte select-group.svelte select-item.svelte select-label.svelte select-scroll-down-button.svelte select-scroll-up-button.svelte select-separator.svelte select-trigger.sveltesheet
index.ts sheet-close.svelte sheet-content.svelte sheet-description.svelte sheet-footer.svelte sheet-header.svelte sheet-overlay.svelte sheet-title.svelte sheet-trigger.sveltesidebar
constants.ts context.svelte.ts index.ts sidebar-content.svelte sidebar-footer.svelte sidebar-group-action.svelte sidebar-group-content.svelte sidebar-group-label.svelte sidebar-group.svelte sidebar-header.svelte sidebar-input.svelte sidebar-inset.svelte sidebar-menu-action.svelte sidebar-menu-badge.svelte sidebar-menu-button.svelte sidebar-menu-item.svelte sidebar-menu-skeleton.svelte sidebar-menu-sub-button.svelte sidebar-menu-sub-item.svelte sidebar-menu-sub.svelte sidebar-menu.svelte sidebar-provider.svelte sidebar-rail.svelte sidebar-separator.svelte sidebar-trigger.svelte sidebar.sveltetable
index.ts table-body.svelte table-caption.svelte table-cell.svelte table-footer.svelte table-head.svelte table-header.svelte table-row.svelte table.svelteconstants
auto-scroll.ts binary-detection.ts default-context.ts floating-ui-constraints.ts icons.ts input-classes.ts latex-protection.ts literal-html.ts localstorage-keys.ts max-bundle-size.ts precision.ts processing-info.ts settings-config.ts supported-file-types.ts table-html-restorer.ts tooltip-config.ts viewport.tsstores
chat.svelte.ts conversations.svelte.ts models.svelte.ts persisted.svelte.ts server.svelte.ts settings.svelte.tsutils
api-headers.ts api-key-validation.ts attachment-display.ts attachment-type.ts audio-recording.ts autoresize-textarea.ts branching.ts browser-only.ts clipboard.ts config-helpers.ts conversation-utils.ts convert-files-to-extra.ts file-preview.ts file-type.ts formatters.ts index.ts is-ime-composing.ts latex-protection.ts modality-file-validation.ts model-names.ts pdf-processing.ts portal-to-body.ts precision.ts process-uploaded-files.ts svg-to-png.ts syntax-highlight-language.ts text-files.ts text.ts webp-to-png.tstests
llama.cpp/tools/server/themes/wild/index.html
raw
1<html>
2
3<head>
4 <meta charset="UTF-8">
5 <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1" />
6 <meta name="color-scheme" content="light dark">
7 <title>llama.cpp - chat</title>
8
9 <style>
10 body {
11 font-family: system-ui;
12 font-size: 90%;
13 background-image: url('llamapattern.png');
14 }
15
16 #container {
17 margin: 0em auto;
18 display: flex;
19 flex-direction: column;
20 justify-content: space-between;
21 height: 100%;
22 }
23
24 main {
25 margin: 3px;
26 display: flex;
27 flex-direction: column;
28 justify-content: space-between;
29 gap: 1em;
30
31 flex-grow: 1;
32 overflow-y: auto;
33
34 border: 1px solid #ccc;
35 border-radius: 5px;
36 padding: 0.5em;
37
38 background-color: rgba(255,255,255,0.9);
39 }
40
41 body {
42 max-width: 600px;
43 min-width: 300px;
44 line-height: 1.2;
45 margin: 0 auto;
46 padding: 0 0.5em;
47 }
48
49 p {
50 overflow-wrap: break-word;
51 word-wrap: break-word;
52 hyphens: auto;
53 margin-top: 0.5em;
54 margin-bottom: 0.5em;
55 }
56
57 #write form {
58 margin: 1em 0 0 0;
59 display: flex;
60 flex-direction: column;
61 gap: 0.5em;
62 align-items: stretch;
63 }
64
65 .right {
66 display: flex;
67 flex-direction: row;
68 gap: 0.5em;
69 justify-content: flex-end;
70 }
71
72 fieldset {
73 border: none;
74 padding: 0;
75 margin: 0;
76 }
77
78 fieldset.two {
79 display: grid;
80 grid-template: "a a";
81 gap: 1em;
82 }
83
84 fieldset.three {
85 display: grid;
86 grid-template: "a a a";
87 gap: 1em;
88 }
89
90 details {
91 border: 1px solid #aaa;
92 border-radius: 4px;
93 padding: 0.5em 0.5em 0;
94 margin-top: 0.5em;
95 }
96
97 summary {
98 font-weight: bold;
99 margin: -0.5em -0.5em 0;
100 padding: 0.5em;
101 cursor: pointer;
102 }
103
104 details[open] {
105 padding: 0.5em;
106 }
107
108 .prob-set {
109 padding: 0.3em;
110 border-bottom: 1px solid #ccc;
111 }
112
113 .popover-content {
114 position: absolute;
115 background-color: white;
116 padding: 0.2em;
117 box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
118 }
119
120 textarea {
121 padding: 5px;
122 flex-grow: 1;
123 width: 100%;
124 }
125
126 pre code {
127 display: block;
128 background-color: #222;
129 color: #ddd;
130 }
131
132 code {
133 font-family: monospace;
134 padding: 0.1em 0.3em;
135 border-radius: 3px;
136 }
137
138 fieldset label {
139 margin: 0.5em 0;
140 display: block;
141 }
142
143 fieldset label.slim {
144 margin: 0 0.5em;
145 display: inline;
146 }
147
148 header,
149 footer {
150 text-align: center;
151 }
152
153 footer {
154 font-size: 80%;
155 color: #888;
156 }
157
158 .mode-chat textarea[name=prompt] {
159 height: 4.5em;
160 }
161
162 .mode-completion textarea[name=prompt] {
163 height: 10em;
164 }
165
166 [contenteditable] {
167 display: inline-block;
168 white-space: pre-wrap;
169 outline: 0px solid transparent;
170 }
171
172 @keyframes loading-bg-wipe {
173 0% {
174 background-position: 0%;
175 }
176
177 100% {
178 background-position: 100%;
179 }
180 }
181
182 .loading {
183 --loading-color-1: #eeeeee00;
184 --loading-color-2: #eeeeeeff;
185 background-size: 50% 100%;
186 background-image: linear-gradient(90deg, var(--loading-color-1), var(--loading-color-2), var(--loading-color-1));
187 animation: loading-bg-wipe 2s linear infinite;
188 }
189
190 @media (prefers-color-scheme: dark) {
191 .loading {
192 --loading-color-1: #22222200;
193 --loading-color-2: #222222ff;
194 }
195
196 .popover-content {
197 background-color: black;
198 }
199 }
200 </style>
201
202 <script type="module">
203 import {
204 html, h, signal, effect, computed, render, useSignal, useEffect, useRef, Component
205 } from './index.js';
206
207 import { llama } from './completion.js';
208 import { SchemaConverter } from './json-schema-to-grammar.mjs';
209 let selected_image = false;
210 var slot_id = -1;
211
212 const session = signal({
213 prompt: "This is a conversation between User and Llama, a friendly chatbot. Llama is helpful, kind, honest, good at writing, and never fails to answer any requests immediately and with precision.",
214 template: "{{prompt}}\n\n{{history}}\n{{char}}:",
215 historyTemplate: "{{name}}: {{message}}",
216 transcript: [],
217 type: "chat", // "chat" | "completion"
218 char: "Llama",
219 user: "User",
220 image_selected: ''
221 })
222
223 const params = signal({
224 n_predict: 400,
225 temperature: 0.7,
226 repeat_last_n: 256, // 0 = disable penalty, -1 = context size
227 repeat_penalty: 1.18, // 1.0 = disabled
228 top_k: 40, // <= 0 to use vocab size
229 top_p: 0.95, // 1.0 = disabled
230 min_p: 0.05, // 0 = disabled
231 typical_p: 1.0, // 1.0 = disabled
232 presence_penalty: 0.0, // 0.0 = disabled
233 frequency_penalty: 0.0, // 0.0 = disabled
234 mirostat: 0, // 0/1/2
235 mirostat_tau: 5, // target entropy
236 mirostat_eta: 0.1, // learning rate
237 grammar: '',
238 n_probs: 0, // no completion_probabilities,
239 min_keep: 0, // min probs from each sampler,
240 image_data: [],
241 cache_prompt: true,
242 api_key: ''
243 })
244
245 /* START: Support for storing prompt templates and parameters in browsers LocalStorage */
246
247 const local_storage_storageKey = "llamacpp_server_local_storage";
248
249 function local_storage_setDataFromObject(tag, content) {
250 localStorage.setItem(local_storage_storageKey + '/' + tag, JSON.stringify(content));
251 }
252
253 function local_storage_setDataFromRawText(tag, content) {
254 localStorage.setItem(local_storage_storageKey + '/' + tag, content);
255 }
256
257 function local_storage_getDataAsObject(tag) {
258 const item = localStorage.getItem(local_storage_storageKey + '/' + tag);
259 if (!item) {
260 return null;
261 } else {
262 return JSON.parse(item);
263 }
264 }
265
266 function local_storage_getDataAsRawText(tag) {
267 const item = localStorage.getItem(local_storage_storageKey + '/' + tag);
268 if (!item) {
269 return null;
270 } else {
271 return item;
272 }
273 }
274
275 // create a container for user templates and settings
276
277 const savedUserTemplates = signal({})
278 const selectedUserTemplate = signal({ name: '', template: { session: {}, params: {} } })
279
280 // let's import locally saved templates and settings if there are any
281 // user templates and settings are stored in one object
282 // in form of { "templatename": "templatedata" } and { "settingstemplatename":"settingsdata" }
283
284 console.log('Importing saved templates')
285
286 let importedTemplates = local_storage_getDataAsObject('user_templates')
287
288 if (importedTemplates) {
289 // saved templates were successfully imported.
290
291 console.log('Processing saved templates and updating default template')
292 params.value = { ...params.value, image_data: [] };
293
294 //console.log(importedTemplates);
295 savedUserTemplates.value = importedTemplates;
296
297 //override default template
298 savedUserTemplates.value.default = { session: session.value, params: params.value }
299 local_storage_setDataFromObject('user_templates', savedUserTemplates.value)
300 } else {
301 // no saved templates detected.
302
303 console.log('Initializing LocalStorage and saving default template')
304
305 savedUserTemplates.value = { "default": { session: session.value, params: params.value } }
306 local_storage_setDataFromObject('user_templates', savedUserTemplates.value)
307 }
308
309 function userTemplateResetToDefault() {
310 console.log('Resetting template to default')
311 selectedUserTemplate.value.name = 'default';
312 selectedUserTemplate.value.data = savedUserTemplates.value['default'];
313 }
314
315 function userTemplateApply(t) {
316 session.value = t.data.session;
317 session.value = { ...session.value, image_selected: '' };
318 params.value = t.data.params;
319 params.value = { ...params.value, image_data: [] };
320 }
321
322 function userTemplateResetToDefaultAndApply() {
323 userTemplateResetToDefault()
324 userTemplateApply(selectedUserTemplate.value)
325 }
326
327 function userTemplateLoadAndApplyAutosaved() {
328 // get autosaved last used template
329 let lastUsedTemplate = local_storage_getDataAsObject('user_templates_last')
330
331 if (lastUsedTemplate) {
332
333 console.log('Autosaved template found, restoring')
334
335 selectedUserTemplate.value = lastUsedTemplate
336 }
337 else {
338
339 console.log('No autosaved template found, using default template')
340 // no autosaved last used template was found, so load from default.
341
342 userTemplateResetToDefault()
343 }
344
345 console.log('Applying template')
346 // and update internal data from templates
347
348 userTemplateApply(selectedUserTemplate.value)
349 }
350
351 //console.log(savedUserTemplates.value)
352 //console.log(selectedUserTemplate.value)
353
354 function userTemplateAutosave() {
355 console.log('Template Autosave...')
356 if (selectedUserTemplate.value.name == 'default') {
357 // we don't want to save over default template, so let's create a new one
358 let newTemplateName = 'UserTemplate-' + Date.now().toString()
359 let newTemplate = { 'name': newTemplateName, 'data': { 'session': session.value, 'params': params.value } }
360
361 console.log('Saving as ' + newTemplateName)
362
363 // save in the autosave slot
364 local_storage_setDataFromObject('user_templates_last', newTemplate)
365
366 // and load it back and apply
367 userTemplateLoadAndApplyAutosaved()
368 } else {
369 local_storage_setDataFromObject('user_templates_last', { 'name': selectedUserTemplate.value.name, 'data': { 'session': session.value, 'params': params.value } })
370 }
371 }
372
373 console.log('Checking for autosaved last used template')
374 userTemplateLoadAndApplyAutosaved()
375
376 /* END: Support for storing prompt templates and parameters in browsers LocalStorage */
377
378 const llamaStats = signal(null)
379 const controller = signal(null)
380
381 // currently generating a completion?
382 const generating = computed(() => controller.value != null)
383
384 // has the user started a chat?
385 const chatStarted = computed(() => session.value.transcript.length > 0)
386
387 const transcriptUpdate = (transcript) => {
388 session.value = {
389 ...session.value,
390 transcript
391 }
392 }
393
394 // simple template replace
395 const template = (str, extraSettings) => {
396 let settings = session.value;
397 if (extraSettings) {
398 settings = { ...settings, ...extraSettings };
399 }
400 return String(str).replaceAll(/\{\{(.*?)\}\}/g, (_, key) => template(settings[key]));
401 }
402
403 async function runLlama(prompt, llamaParams, char) {
404 const currentMessages = [];
405 const history = session.value.transcript;
406 if (controller.value) {
407 throw new Error("already running");
408 }
409 controller.value = new AbortController();
410 for await (const chunk of llama(prompt, llamaParams, { controller: controller.value, api_url: location.pathname.replace(/\/+$/, '') })) {
411 const data = chunk.data;
412
413 if (data.stop) {
414 while (
415 currentMessages.length > 0 &&
416 currentMessages[currentMessages.length - 1].content.match(/\n$/) != null
417 ) {
418 currentMessages.pop();
419 }
420 transcriptUpdate([...history, [char, currentMessages]])
421 console.log("Completion finished: '", currentMessages.map(msg => msg.content).join(''), "', summary: ", data);
422 } else {
423 currentMessages.push(data);
424 slot_id = data.slot_id;
425 if (selected_image && !data.multimodal) {
426 alert("The server was not compiled for multimodal or the model projector can't be loaded.");
427 return;
428 }
429 transcriptUpdate([...history, [char, currentMessages]])
430 }
431
432 if (data.timings) {
433 llamaStats.value = data;
434 }
435 }
436
437 controller.value = null;
438 }
439
440 // send message to server
441 const chat = async (msg) => {
442 if (controller.value) {
443 console.log('already running...');
444 return;
445 }
446
447 transcriptUpdate([...session.value.transcript, ["{{user}}", msg]])
448
449 let prompt = template(session.value.template, {
450 message: msg,
451 history: session.value.transcript.flatMap(
452 ([name, data]) =>
453 template(
454 session.value.historyTemplate,
455 {
456 name,
457 message: Array.isArray(data) ?
458 data.map(msg => msg.content).join('').replace(/^\s/, '') :
459 data,
460 }
461 )
462 ).join("\n"),
463 });
464 if (selected_image) {
465 prompt = `A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER:[img-10]${msg}\nASSISTANT:`;
466 }
467 await runLlama(prompt, {
468 ...params.value,
469 slot_id: slot_id,
470 stop: ["</s>", template("{{char}}:"), template("{{user}}:")],
471 }, "{{char}}");
472 }
473
474 const runCompletion = () => {
475 if (controller.value) {
476 console.log('already running...');
477 return;
478 }
479 const { prompt } = session.value;
480 transcriptUpdate([...session.value.transcript, ["", prompt]]);
481 runLlama(prompt, {
482 ...params.value,
483 slot_id: slot_id,
484 stop: [],
485 }, "").finally(() => {
486 session.value.prompt = session.value.transcript.map(([_, data]) =>
487 Array.isArray(data) ? data.map(msg => msg.content).join('') : data
488 ).join('');
489 session.value.transcript = [];
490 })
491 }
492
493 const stop = (e) => {
494 e.preventDefault();
495 if (controller.value) {
496 controller.value.abort();
497 controller.value = null;
498 }
499 }
500
501 const reset = (e) => {
502 stop(e);
503 transcriptUpdate([]);
504 }
505
506 const uploadImage = (e) => {
507 e.preventDefault();
508 document.getElementById("fileInput").click();
509 document.getElementById("fileInput").addEventListener("change", function (event) {
510 const selectedFile = event.target.files[0];
511 if (selectedFile) {
512 const reader = new FileReader();
513 reader.onload = function () {
514 const image_data = reader.result;
515 session.value = { ...session.value, image_selected: image_data };
516 params.value = {
517 ...params.value, image_data: [
518 { data: image_data.replace(/data:image\/[^;]+;base64,/, ''), id: 10 }]
519 }
520 };
521 selected_image = true;
522 reader.readAsDataURL(selectedFile);
523 }
524 });
525 }
526
527 function MessageInput() {
528 const message = useSignal("")
529
530 const submit = (e) => {
531 stop(e);
532 chat(message.value);
533 message.value = "";
534 }
535
536 const enterSubmits = (event) => {
537 if (event.which === 13 && !event.shiftKey) {
538 submit(event);
539 }
540 }
541
542 return html`
543 <form onsubmit=${submit}>
544 <div>
545 <textarea
546 className=${generating.value ? "loading" : null}
547 oninput=${(e) => message.value = e.target.value}
548 onkeypress=${enterSubmits}
549 placeholder="Say something..."
550 rows=2
551 type="text"
552 value="${message}"
553 />
554 </div>
555 <div class="right">
556 <button type="submit" disabled=${generating.value}>Send</button>
557 <button onclick=${uploadImage}>Upload Image</button>
558 <button onclick=${stop} disabled=${!generating.value}>Stop</button>
559 <button onclick=${reset}>Reset</button>
560 </div>
561 </form>
562 `
563 }
564
565 function CompletionControls() {
566 const submit = (e) => {
567 stop(e);
568 runCompletion();
569 }
570 return html`
571 <div>
572 <button onclick=${submit} type="button" disabled=${generating.value}>Start</button>
573 <button onclick=${stop} disabled=${!generating.value}>Stop</button>
574 <button onclick=${reset}>Reset</button>
575 </div>`;
576 }
577
578 const ChatLog = (props) => {
579 const messages = session.value.transcript;
580 const container = useRef(null)
581
582 useEffect(() => {
583 // scroll to bottom (if needed)
584 const parent = container.current.parentElement;
585 if (parent && parent.scrollHeight <= parent.scrollTop + parent.offsetHeight + 300) {
586 parent.scrollTo(0, parent.scrollHeight)
587 }
588 }, [messages])
589
590 const isCompletionMode = session.value.type === 'completion'
591 const chatLine = ([user, data], index) => {
592 let message
593 const isArrayMessage = Array.isArray(data)
594 if (params.value.n_probs > 0 && isArrayMessage) {
595 message = html`<${Probabilities} data=${data} />`
596 } else {
597 const text = isArrayMessage ?
598 data.map(msg => msg.content).join('').replace(/^\s+/, '') :
599 data;
600 message = isCompletionMode ?
601 text :
602 html`<${Markdownish} text=${template(text)} />`
603 }
604 if (user) {
605 return html`<p key=${index}><strong>${template(user)}:</strong> ${message}</p>`
606 } else {
607 return isCompletionMode ?
608 html`<span key=${index}>${message}</span>` :
609 html`<p key=${index}>${message}</p>`
610 }
611 };
612
613 const handleCompletionEdit = (e) => {
614 session.value.prompt = e.target.innerText;
615 session.value.transcript = [];
616 }
617
618 return html`
619 <div id="chat" ref=${container} key=${messages.length}>
620 <img style="width: 60%;${!session.value.image_selected ? `display: none;` : ``}" src="${session.value.image_selected}"/>
621 <span contenteditable=${isCompletionMode} ref=${container} oninput=${handleCompletionEdit}>
622 ${messages.flatMap(chatLine)}
623 </span>
624 </div>`;
625 };
626
627 const ConfigForm = (props) => {
628 const updateSession = (el) => session.value = { ...session.value, [el.target.name]: el.target.value }
629 const updateParams = (el) => params.value = { ...params.value, [el.target.name]: el.target.value }
630 const updateParamsFloat = (el) => params.value = { ...params.value, [el.target.name]: parseFloat(el.target.value) }
631 const updateParamsInt = (el) => params.value = { ...params.value, [el.target.name]: Math.floor(parseFloat(el.target.value)) }
632 const updateParamsBool = (el) => params.value = { ...params.value, [el.target.name]: el.target.checked }
633
634 const grammarJsonSchemaPropOrder = signal('')
635 const updateGrammarJsonSchemaPropOrder = (el) => grammarJsonSchemaPropOrder.value = el.target.value
636 const convertJSONSchemaGrammar = async () => {
637 try {
638 let schema = JSON.parse(params.value.grammar)
639 const converter = new SchemaConverter({
640 prop_order: grammarJsonSchemaPropOrder.value
641 .split(',')
642 .reduce((acc, cur, i) => ({ ...acc, [cur.trim()]: i }), {}),
643 allow_fetch: true,
644 })
645 schema = await converter.resolveRefs(schema, 'input')
646 converter.visit(schema, '')
647 params.value = {
648 ...params.value,
649 grammar: converter.formatGrammar(),
650 }
651 } catch (e) {
652 alert(`Convert failed: ${e.message}`)
653 }
654 }
655
656 const FloatField = ({ label, max, min, name, step, value }) => {
657 return html`
658 <div>
659 <label for="${name}">${label}</label>
660 <input type="range" id="${name}" min="${min}" max="${max}" step="${step}" name="${name}" value="${value}" oninput=${updateParamsFloat} />
661 <span>${value}</span>
662 </div>
663 `
664 };
665
666 const IntField = ({ label, max, min, name, value }) => {
667 return html`
668 <div>
669 <label for="${name}">${label}</label>
670 <input type="range" id="${name}" min="${min}" max="${max}" name="${name}" value="${value}" oninput=${updateParamsInt} />
671 <span>${value}</span>
672 </div>
673 `
674 };
675
676 const BoolField = ({ label, name, value }) => {
677 return html`
678 <div>
679 <label for="${name}">${label}</label>
680 <input type="checkbox" id="${name}" name="${name}" checked="${value}" onclick=${updateParamsBool} />
681 </div>
682 `
683 };
684
685 const userTemplateReset = (e) => {
686 e.preventDefault();
687 userTemplateResetToDefaultAndApply()
688 }
689
690 const UserTemplateResetButton = () => {
691 if (selectedUserTemplate.value.name == 'default') {
692 return html`
693 <button disabled>Using default template</button>
694 `
695 }
696
697 return html`
698 <button onclick=${userTemplateReset}>Reset all to default</button>
699 `
700 };
701
702 useEffect(() => {
703 // autosave template on every change
704 userTemplateAutosave()
705 }, [session.value, params.value])
706
707 const GrammarControl = () => (
708 html`
709 <div>
710 <label for="template">Grammar</label>
711 <textarea id="grammar" name="grammar" placeholder="Use gbnf or JSON Schema+convert" value="${params.value.grammar}" rows=4 oninput=${updateParams}/>
712 <input type="text" name="prop-order" placeholder="order: prop1,prop2,prop3" oninput=${updateGrammarJsonSchemaPropOrder} />
713 <button type="button" onclick=${convertJSONSchemaGrammar}>Convert JSON Schema</button>
714 </div>
715 `
716 );
717
718 const PromptControlFieldSet = () => (
719 html`
720 <fieldset>
721 <div>
722 <label htmlFor="prompt">Prompt</label>
723 <textarea type="text" name="prompt" value="${session.value.prompt}" oninput=${updateSession}/>
724 </div>
725 </fieldset>
726 `
727 );
728
729 const ChatConfigForm = () => (
730 html`
731 ${PromptControlFieldSet()}
732
733 <fieldset class="two">
734 <div>
735 <label for="user">User name</label>
736 <input type="text" name="user" value="${session.value.user}" oninput=${updateSession} />
737 </div>
738
739 <div>
740 <label for="bot">Bot name</label>
741 <input type="text" name="char" value="${session.value.char}" oninput=${updateSession} />
742 </div>
743 </fieldset>
744
745 <fieldset>
746 <div>
747 <label for="template">Prompt template</label>
748 <textarea id="template" name="template" value="${session.value.template}" rows=4 oninput=${updateSession}/>
749 </div>
750
751 <div>
752 <label for="template">Chat history template</label>
753 <textarea id="template" name="historyTemplate" value="${session.value.historyTemplate}" rows=1 oninput=${updateSession}/>
754 </div>
755 ${GrammarControl()}
756 </fieldset>
757 `
758 );
759
760 const CompletionConfigForm = () => (
761 html`
762 ${PromptControlFieldSet()}
763 <fieldset>${GrammarControl()}</fieldset>
764 `
765 );
766
767 return html`
768 <form>
769 <fieldset class="two">
770 <${UserTemplateResetButton}/>
771 <div>
772 <label class="slim"><input type="radio" name="type" value="chat" checked=${session.value.type === "chat"} oninput=${updateSession} /> Chat</label>
773 <label class="slim"><input type="radio" name="type" value="completion" checked=${session.value.type === "completion"} oninput=${updateSession} /> Completion</label>
774 </div>
775 </fieldset>
776
777 ${session.value.type === 'chat' ? ChatConfigForm() : CompletionConfigForm()}
778
779 <fieldset class="two">
780 ${IntField({ label: "Predictions", max: 2048, min: -1, name: "n_predict", value: params.value.n_predict })}
781 ${FloatField({ label: "Temperature", max: 2.0, min: 0.0, name: "temperature", step: 0.01, value: params.value.temperature })}
782 ${FloatField({ label: "Penalize repeat sequence", max: 2.0, min: 0.0, name: "repeat_penalty", step: 0.01, value: params.value.repeat_penalty })}
783 ${IntField({ label: "Consider N tokens for penalize", max: 2048, min: 0, name: "repeat_last_n", value: params.value.repeat_last_n })}
784 ${IntField({ label: "Top-K sampling", max: 100, min: -1, name: "top_k", value: params.value.top_k })}
785 ${FloatField({ label: "Top-P sampling", max: 1.0, min: 0.0, name: "top_p", step: 0.01, value: params.value.top_p })}
786 ${FloatField({ label: "Min-P sampling", max: 1.0, min: 0.0, name: "min_p", step: 0.01, value: params.value.min_p })}
787 </fieldset>
788 <details>
789 <summary>More options</summary>
790 <fieldset class="two">
791 ${FloatField({ label: "Typical P", max: 1.0, min: 0.0, name: "typical_p", step: 0.01, value: params.value.typical_p })}
792 ${FloatField({ label: "Presence penalty", max: 1.0, min: 0.0, name: "presence_penalty", step: 0.01, value: params.value.presence_penalty })}
793 ${FloatField({ label: "Frequency penalty", max: 1.0, min: 0.0, name: "frequency_penalty", step: 0.01, value: params.value.frequency_penalty })}
794 </fieldset>
795 <hr />
796 <fieldset class="three">
797 <div>
798 <label><input type="radio" name="mirostat" value="0" checked=${params.value.mirostat == 0} oninput=${updateParamsInt} /> no Mirostat</label>
799 <label><input type="radio" name="mirostat" value="1" checked=${params.value.mirostat == 1} oninput=${updateParamsInt} /> Mirostat v1</label>
800 <label><input type="radio" name="mirostat" value="2" checked=${params.value.mirostat == 2} oninput=${updateParamsInt} /> Mirostat v2</label>
801 </div>
802 ${FloatField({ label: "Mirostat tau", max: 10.0, min: 0.0, name: "mirostat_tau", step: 0.01, value: params.value.mirostat_tau })}
803 ${FloatField({ label: "Mirostat eta", max: 1.0, min: 0.0, name: "mirostat_eta", step: 0.01, value: params.value.mirostat_eta })}
804 </fieldset>
805 <fieldset>
806 ${IntField({ label: "Show Probabilities", max: 10, min: 0, name: "n_probs", value: params.value.n_probs })}
807 </fieldset>
808 <fieldset>
809 ${IntField({ label: "Min Probabilities from each Sampler", max: 10, min: 0, name: "min_keep", value: params.value.min_keep })}
810 </fieldset>
811 <fieldset>
812 <label for="api_key">API Key</label>
813 <input type="text" name="api_key" value="${params.value.api_key}" placeholder="Enter API key" oninput=${updateParams} />
814 </fieldset>
815 </details>
816 </form>
817 `
818 }
819
820 const probColor = (p) => {
821 const r = Math.floor(192 * (1 - p));
822 const g = Math.floor(192 * p);
823 return `rgba(${r},${g},0,0.3)`;
824 }
825
826 const Probabilities = (params) => {
827 return params.data.map(msg => {
828 const { completion_probabilities } = msg;
829 if (
830 !completion_probabilities ||
831 completion_probabilities.length === 0
832 ) return msg.content
833
834 if (completion_probabilities.length > 1) {
835 // Not for byte pair
836 if (completion_probabilities[0].content.startsWith('byte: \\')) return msg.content
837
838 const splitData = completion_probabilities.map(prob => ({
839 content: prob.content,
840 completion_probabilities: [prob]
841 }))
842 return html`<${Probabilities} data=${splitData} />`
843 }
844
845 const { probs, content } = completion_probabilities[0]
846 const found = probs.find(p => p.tok_str === msg.content)
847 const pColor = found ? probColor(found.prob) : 'transparent'
848
849 const popoverChildren = html`
850 <div class="prob-set">
851 ${probs.map((p, index) => {
852 return html`
853 <div
854 key=${index}
855 title=${`prob: ${p.prob}`}
856 style=${{
857 padding: '0.3em',
858 backgroundColor: p.tok_str === content ? probColor(p.prob) : 'transparent'
859 }}
860 >
861 <span>${p.tok_str}: </span>
862 <span>${Math.floor(p.prob * 100)}%</span>
863 </div>
864 `
865 })}
866 </div>
867 `
868
869 return html`
870 <${Popover} style=${{ backgroundColor: pColor }} popoverChildren=${popoverChildren}>
871 ${msg.content.match(/\n/gim) ? html`<br />` : msg.content}
872 </>
873 `
874 });
875 }
876
877 // poor mans markdown replacement
878 const Markdownish = (params) => {
879 const md = params.text
880 .replace(/&/g, '&')
881 .replace(/</g, '<')
882 .replace(/>/g, '>')
883 .replace(/^#{1,6} (.*)$/gim, '<h3>$1</h3>')
884 .replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>')
885 .replace(/__(.*?)__/g, '<strong>$1</strong>')
886 .replace(/\*(.*?)\*/g, '<em>$1</em>')
887 .replace(/_(.*?)_/g, '<em>$1</em>')
888 .replace(/```.*?\n([\s\S]*?)```/g, '<pre><code>$1</code></pre>')
889 .replace(/`(.*?)`/g, '<code>$1</code>')
890 .replace(/\n/gim, '<br />');
891 return html`<span dangerouslySetInnerHTML=${{ __html: md }} />`;
892 };
893
894 const ModelGenerationInfo = (params) => {
895 if (!llamaStats.value) {
896 return html`<span/>`
897 }
898 return html`
899 <span>
900 ${llamaStats.value.tokens_predicted} predicted, ${llamaStats.value.tokens_cached} cached, ${llamaStats.value.timings.predicted_per_token_ms.toFixed()}ms per token, ${llamaStats.value.timings.predicted_per_second.toFixed(2)} tokens per second
901 </span>
902 `
903 }
904
905 // simple popover impl
906 const Popover = (props) => {
907 const isOpen = useSignal(false);
908 const position = useSignal({ top: '0px', left: '0px' });
909 const buttonRef = useRef(null);
910 const popoverRef = useRef(null);
911
912 const togglePopover = () => {
913 if (buttonRef.current) {
914 const rect = buttonRef.current.getBoundingClientRect();
915 position.value = {
916 top: `${rect.bottom + window.scrollY}px`,
917 left: `${rect.left + window.scrollX}px`,
918 };
919 }
920 isOpen.value = !isOpen.value;
921 };
922
923 const handleClickOutside = (event) => {
924 if (popoverRef.current && !popoverRef.current.contains(event.target) && !buttonRef.current.contains(event.target)) {
925 isOpen.value = false;
926 }
927 };
928
929 useEffect(() => {
930 document.addEventListener('mousedown', handleClickOutside);
931 return () => {
932 document.removeEventListener('mousedown', handleClickOutside);
933 };
934 }, []);
935
936 return html`
937 <span style=${props.style} ref=${buttonRef} onClick=${togglePopover}>${props.children}</span>
938 ${isOpen.value && html`
939 <${Portal} into="#portal">
940 <div
941 ref=${popoverRef}
942 class="popover-content"
943 style=${{
944 top: position.value.top,
945 left: position.value.left,
946 }}
947 >
948 ${props.popoverChildren}
949 </div>
950 </${Portal}>
951 `}
952 `;
953 };
954
955 // Source: preact-portal (https://github.com/developit/preact-portal/blob/master/src/preact-portal.js)
956 /** Redirect rendering of descendants into the given CSS selector */
957 class Portal extends Component {
958 componentDidUpdate(props) {
959 for (let i in props) {
960 if (props[i] !== this.props[i]) {
961 return setTimeout(this.renderLayer);
962 }
963 }
964 }
965
966 componentDidMount() {
967 this.isMounted = true;
968 this.renderLayer = this.renderLayer.bind(this);
969 this.renderLayer();
970 }
971
972 componentWillUnmount() {
973 this.renderLayer(false);
974 this.isMounted = false;
975 if (this.remote && this.remote.parentNode) this.remote.parentNode.removeChild(this.remote);
976 }
977
978 findNode(node) {
979 return typeof node === 'string' ? document.querySelector(node) : node;
980 }
981
982 renderLayer(show = true) {
983 if (!this.isMounted) return;
984
985 // clean up old node if moving bases:
986 if (this.props.into !== this.intoPointer) {
987 this.intoPointer = this.props.into;
988 if (this.into && this.remote) {
989 this.remote = render(html`<${PortalProxy} />`, this.into, this.remote);
990 }
991 this.into = this.findNode(this.props.into);
992 }
993
994 this.remote = render(html`
995 <${PortalProxy} context=${this.context}>
996 ${show && this.props.children || null}
997 </${PortalProxy}>
998 `, this.into, this.remote);
999 }
1000
1001 render() {
1002 return null;
1003 }
1004 }
1005 // high-order component that renders its first child if it exists.
1006 // used as a conditional rendering proxy.
1007 class PortalProxy extends Component {
1008 getChildContext() {
1009 return this.props.context;
1010 }
1011 render({ children }) {
1012 return children || null;
1013 }
1014 }
1015
1016 function App(props) {
1017 useEffect(() => {
1018 const query = new URLSearchParams(location.search).get("q");
1019 if (query) chat(query);
1020 }, []);
1021
1022 return html`
1023 <div class="mode-${session.value.type}">
1024 <header>
1025 <img src="llama_cpp.png" style="width:100%"/>
1026 </header>
1027
1028 <section id="write">
1029 <${session.value.type === 'chat' ? MessageInput : CompletionControls} />
1030 </section>
1031
1032 <main id="content">
1033 <${chatStarted.value ? ChatLog : ConfigForm} />
1034 </main>
1035
1036
1037 <footer>
1038 <p><${ModelGenerationInfo} /></p>
1039 <p>Powered by <a href="https://github.com/ggml-org/llama.cpp">llama.cpp</a> and <a href="https://ggml.ai">ggml.ai</a>.</p>
1040 </footer>
1041 </div>
1042 `;
1043 }
1044
1045 render(h(App), document.querySelector('#container'));
1046 </script>
1047</head>
1048
1049<body>
1050 <div id="container">
1051 <input type="file" id="fileInput" accept="image/*" style="display: none;">
1052 </div>
1053 <div id="portal"></div>
1054</body>
1055
1056</html>