archive llama.cpp-b8008.tar.gz
corpus lotr.txt map1_bromm.txt map1_dagna.txt map1_keldor.txt map1_skara.txt map1_thrain.txt
llama.cpp
.devops
nix apps.nix devshells.nix docker.nix jetson-support.nix nixpkgs-instances.nix package-gguf-py.nix package.nix python-scripts.nix scope.nix sif.nix
cann.Dockerfile cpu.Dockerfile cuda-new.Dockerfile cuda.Dockerfile intel.Dockerfile llama-cli-cann.Dockerfile llama-cpp-cuda.srpm.spec llama-cpp.srpm.spec musa.Dockerfile rocm.Dockerfile s390x.Dockerfile tools.sh vulkan.Dockerfile
.gemini settings.json
.github
ISSUE_TEMPLATE 010-bug-compilation.yml 011-bug-results.yml 019-bug-misc.yml 020-enhancement.yml 030-research.yml 040-refactor.yml config.yml
actions
get-tag-name action.yml
install-exe action.yml
linux-setup-spacemit action.yml
linux-setup-vulkan action.yml
unarchive-tar action.yml
windows-setup-cuda action.yml
windows-setup-rocm action.yml
workflows bench.yml.disabled build-cache.yml build-cmake-pkg.yml build-linux-cross.yml build.yml check-vendor.yml close-issue.yml copilot-setup-steps.yml docker.yml editorconfig.yml gguf-publish.yml labeler.yml pre-tokenizer-hashes.yml python-check-requirements.yml python-lint.yml python-type-check.yml release.yml server-metal.yml server-webui.yml server.yml update-ops-docs.yml winget.yml
labeler.yml pull_request_template.md
benches
dgx-spark aime25_openai__gpt-oss-120b-high_temp1.0_20251109_094547.html aime25_openai__gpt-oss-120b-high_temp1.0_20251109_094547.json aime25_openai__gpt-oss-120b-high_temp1.0_20251109_094547_allresults.json dgx-spark.md
mac-m2-ultra mac-m2-ultra.md
ci README-MUSA.md README.md run.sh
cmake arm64-apple-clang.cmake arm64-windows-llvm.cmake build-info.cmake common.cmake download-models.cmake git-vars.cmake license.cmake llama-config.cmake.in llama.pc.in riscv64-spacemit-linux-gnu-gcc.cmake x64-windows-llvm.cmake
common
jinja README.md caps.cpp caps.h lexer.cpp lexer.h parser.cpp parser.h runtime.cpp runtime.h string.cpp string.h utils.h value.cpp value.h
CMakeLists.txt arg.cpp arg.h base64.hpp build-info.cpp.in chat-parser-xml-toolcall.cpp chat-parser-xml-toolcall.h chat-parser.cpp chat-parser.h chat-peg-parser.cpp chat-peg-parser.h chat.cpp chat.h common.cpp common.h console.cpp console.h debug.cpp debug.h download.cpp download.h http.h json-partial.cpp json-partial.h json-schema-to-grammar.cpp json-schema-to-grammar.h llguidance.cpp log.cpp log.h ngram-cache.cpp ngram-cache.h ngram-map.cpp ngram-map.h ngram-mod.cpp ngram-mod.h peg-parser.cpp peg-parser.h preset.cpp preset.h regex-partial.cpp regex-partial.h sampling.cpp sampling.h speculative.cpp speculative.h unicode.cpp unicode.h
docs
android imported-into-android-studio.jpg
backend
VirtGPU configuration.md development.md
snapdragon CMakeUserPresets.json README.md developer.md windows.md
BLIS.md CANN.md CUDA-FEDORA.md OPENCL.md SYCL.md VirtGPU.md ZenDNN.md zDNN.md
development
llama-star idea-arch.key idea-arch.pdf
HOWTO-add-model.md debugging-tests.md parsing.md token_generation_performance_tips.md
multimodal MobileVLM.md gemma3.md glmedge.md granitevision.md llava.md minicpmo2.6.md minicpmo4.0.md minicpmv2.5.md minicpmv2.6.md minicpmv4.0.md minicpmv4.5.md
ops BLAS.csv CANN.csv CPU.csv CUDA.csv Metal.csv OpenCL.csv SYCL.csv Vulkan.csv WebGPU.csv ZenDNN.csv zDNN.csv
android.md build-riscv64-spacemit.md build-s390x.md build.md docker.md function-calling.md install.md llguidance.md multimodal.md ops.md preset.md speculative.md
examples
batched CMakeLists.txt README.md batched.cpp
batched.swift
Sources main.swift
.gitignore Makefile Package.swift README.md
convert-llama2c-to-ggml CMakeLists.txt README.md convert-llama2c-to-ggml.cpp
debug CMakeLists.txt README.md debug.cpp
deprecation-warning README.md deprecation-warning.cpp
diffusion CMakeLists.txt README.md diffusion-cli.cpp
embedding CMakeLists.txt README.md embedding.cpp
eval-callback CMakeLists.txt README.md eval-callback.cpp
gen-docs CMakeLists.txt gen-docs.cpp
gguf CMakeLists.txt gguf.cpp
gguf-hash
deps
rotate-bits package.json rotate-bits.h
sha1 package.json sha1.c sha1.h
sha256 package.json sha256.c sha256.h
xxhash clib.json xxhash.c xxhash.h
CMakeLists.txt README.md gguf-hash.cpp
idle CMakeLists.txt README.md idle.cpp
llama.android
app
src
main
java
com
example
llama MainActivity.kt MessageAdapter.kt
res
drawable bg_assistant_message.xml bg_user_message.xml ic_launcher_background.xml ic_launcher_foreground.xml outline_folder_open_24.xml outline_send_24.xml
layout activity_main.xml item_message_assistant.xml item_message_user.xml
mipmap-anydpi ic_launcher.xml ic_launcher_round.xml
mipmap-hdpi ic_launcher.webp ic_launcher_round.webp
mipmap-mdpi ic_launcher.webp ic_launcher_round.webp
mipmap-xhdpi ic_launcher.webp ic_launcher_round.webp
mipmap-xxhdpi ic_launcher.webp ic_launcher_round.webp
mipmap-xxxhdpi ic_launcher.webp ic_launcher_round.webp
values colors.xml strings.xml themes.xml
xml backup_rules.xml data_extraction_rules.xml
AndroidManifest.xml
.gitignore build.gradle.kts proguard-rules.pro
gradle
wrapper gradle-wrapper.jar gradle-wrapper.properties
libs.versions.toml
lib
src
androidTest
java
android
llama
cpp ExampleInstrumentedTest.kt
main
cpp CMakeLists.txt ai_chat.cpp logging.h
java
com
arm
aichat
gguf FileType.kt GgufMetadata.kt GgufMetadataReader.kt
internal
gguf GgufMetadataReaderImpl.kt
InferenceEngineImpl.kt
AiChat.kt InferenceEngine.kt
AndroidManifest.xml
test
java
android
llama
cpp ExampleUnitTest.kt
.gitignore build.gradle.kts consumer-rules.pro proguard-rules.pro
.gitignore build.gradle.kts gradle.properties gradlew settings.gradle.kts
llama.swiftui
llama.cpp.swift LibLlama.swift
llama.swiftui
Assets.xcassets
AppIcon.appiconset Contents.json
Contents.json
Models LlamaState.swift
Resources
models .gitignore
UI ContentView.swift DownloadButton.swift InputButton.swift LoadCustomButton.swift
llama_swiftuiApp.swift
llama.swiftui.xcodeproj
project.xcworkspace contents.xcworkspacedata
project.pbxproj
.gitignore README.md
lookahead CMakeLists.txt README.md lookahead.cpp
lookup CMakeLists.txt README.md lookup-create.cpp lookup-merge.cpp lookup-stats.cpp lookup.cpp
model-conversion
scripts
causal compare-embeddings-logits.sh compare-logits.py convert-model.sh modelcard.template run-casual-gen-embeddings-org.py run-converted-model-embeddings-logits.sh run-converted-model.sh run-org-model.py
embedding compare-embeddings-logits.sh convert-model.sh modelcard.template run-converted-model.sh run-original-model.py
utils __init__.py check-nmse.py common.py compare_tokens.py create-collection-add-model.sh curl-embedding-server.sh hf-add-model-to-collection.py hf-create-collection.py hf-create-model.py hf-upload-gguf-model.py inspect-converted-model.sh inspect-org-model.py perplexity-gen.sh perplexity-run-simple.sh perplexity-run.sh quantize.sh run-embedding-server.sh semantic_check.py tensor-info.py
.gitignore Makefile README.md requirements.txt
parallel CMakeLists.txt README.md parallel.cpp
passkey CMakeLists.txt README.md passkey.cpp
retrieval CMakeLists.txt README.md retrieval.cpp
save-load-state CMakeLists.txt save-load-state.cpp
simple CMakeLists.txt README.md simple.cpp
simple-chat CMakeLists.txt README.md simple-chat.cpp
simple-cmake-pkg .gitignore CMakeLists.txt README.md
speculative CMakeLists.txt README.md speculative.cpp
speculative-simple CMakeLists.txt README.md speculative-simple.cpp
sycl CMakeLists.txt README.md build.sh ls-sycl-device.cpp run-llama2.sh test.sh win-build-sycl.bat win-run-llama2.bat win-test.bat
training CMakeLists.txt README.md finetune.cpp
CMakeLists.txt convert_legacy_llama.py json_schema_pydantic_example.py json_schema_to_grammar.py llama.vim pydantic_models_to_grammar.py pydantic_models_to_grammar_examples.py reason-act.sh regex_to_grammar.py server-llama2-13B.sh server_embd.py ts-type-to-grammar.sh
ggml
cmake GitVars.cmake common.cmake ggml-config.cmake.in
include ggml-alloc.h ggml-backend.h ggml-blas.h ggml-cann.h ggml-cpp.h ggml-cpu.h ggml-cuda.h ggml-hexagon.h ggml-metal.h ggml-opencl.h ggml-opt.h ggml-rpc.h ggml-sycl.h ggml-virtgpu.h ggml-vulkan.h ggml-webgpu.h ggml-zdnn.h ggml-zendnn.h ggml.h gguf.h
src
ggml-blas CMakeLists.txt ggml-blas.cpp
ggml-cann CMakeLists.txt acl_tensor.cpp acl_tensor.h aclnn_ops.cpp aclnn_ops.h common.h ggml-cann.cpp
ggml-cpu
amx amx.cpp amx.h common.h mmq.cpp mmq.h
arch
arm cpu-feats.cpp quants.c repack.cpp
loongarch quants.c
powerpc cpu-feats.cpp quants.c
riscv cpu-feats.cpp quants.c repack.cpp
s390 cpu-feats.cpp quants.c
wasm quants.c
x86 cpu-feats.cpp quants.c repack.cpp
cmake FindSIMD.cmake
kleidiai kernels.cpp kernels.h kleidiai.cpp kleidiai.h
llamafile sgemm-ppc.h sgemm.cpp sgemm.h
spacemit ime.cpp ime.h ime1_kernels.cpp ime_kernels.h
CMakeLists.txt arch-fallback.h binary-ops.cpp binary-ops.h common.h ggml-cpu-impl.h ggml-cpu.c ggml-cpu.cpp hbm.cpp hbm.h ops.cpp ops.h quants.c quants.h repack.cpp repack.h simd-mappings.h traits.cpp traits.h unary-ops.cpp unary-ops.h vec.cpp vec.h
ggml-cuda
template-instances fattn-mma-f16-instance-ncols1_1-ncols2_16.cu fattn-mma-f16-instance-ncols1_1-ncols2_32.cu fattn-mma-f16-instance-ncols1_1-ncols2_8.cu fattn-mma-f16-instance-ncols1_16-ncols2_1.cu fattn-mma-f16-instance-ncols1_16-ncols2_2.cu fattn-mma-f16-instance-ncols1_16-ncols2_4.cu fattn-mma-f16-instance-ncols1_2-ncols2_16.cu fattn-mma-f16-instance-ncols1_2-ncols2_32.cu fattn-mma-f16-instance-ncols1_2-ncols2_4.cu fattn-mma-f16-instance-ncols1_2-ncols2_8.cu fattn-mma-f16-instance-ncols1_32-ncols2_1.cu fattn-mma-f16-instance-ncols1_32-ncols2_2.cu fattn-mma-f16-instance-ncols1_4-ncols2_16.cu fattn-mma-f16-instance-ncols1_4-ncols2_2.cu fattn-mma-f16-instance-ncols1_4-ncols2_4.cu fattn-mma-f16-instance-ncols1_4-ncols2_8.cu fattn-mma-f16-instance-ncols1_64-ncols2_1.cu fattn-mma-f16-instance-ncols1_8-ncols2_1.cu fattn-mma-f16-instance-ncols1_8-ncols2_2.cu fattn-mma-f16-instance-ncols1_8-ncols2_4.cu fattn-mma-f16-instance-ncols1_8-ncols2_8.cu fattn-tile-instance-dkq112-dv112.cu fattn-tile-instance-dkq128-dv128.cu fattn-tile-instance-dkq256-dv256.cu fattn-tile-instance-dkq40-dv40.cu fattn-tile-instance-dkq576-dv512.cu fattn-tile-instance-dkq64-dv64.cu fattn-tile-instance-dkq72-dv72.cu fattn-tile-instance-dkq80-dv80.cu fattn-tile-instance-dkq96-dv96.cu fattn-vec-instance-f16-f16.cu fattn-vec-instance-f16-q4_0.cu fattn-vec-instance-f16-q4_1.cu fattn-vec-instance-f16-q5_0.cu fattn-vec-instance-f16-q5_1.cu fattn-vec-instance-f16-q8_0.cu fattn-vec-instance-q4_0-f16.cu fattn-vec-instance-q4_0-q4_0.cu fattn-vec-instance-q4_0-q4_1.cu fattn-vec-instance-q4_0-q5_0.cu fattn-vec-instance-q4_0-q5_1.cu fattn-vec-instance-q4_0-q8_0.cu fattn-vec-instance-q4_1-f16.cu fattn-vec-instance-q4_1-q4_0.cu fattn-vec-instance-q4_1-q4_1.cu fattn-vec-instance-q4_1-q5_0.cu fattn-vec-instance-q4_1-q5_1.cu fattn-vec-instance-q4_1-q8_0.cu fattn-vec-instance-q5_0-f16.cu fattn-vec-instance-q5_0-q4_0.cu fattn-vec-instance-q5_0-q4_1.cu fattn-vec-instance-q5_0-q5_0.cu fattn-vec-instance-q5_0-q5_1.cu fattn-vec-instance-q5_0-q8_0.cu fattn-vec-instance-q5_1-f16.cu fattn-vec-instance-q5_1-q4_0.cu fattn-vec-instance-q5_1-q4_1.cu fattn-vec-instance-q5_1-q5_0.cu fattn-vec-instance-q5_1-q5_1.cu fattn-vec-instance-q5_1-q8_0.cu fattn-vec-instance-q8_0-f16.cu fattn-vec-instance-q8_0-q4_0.cu fattn-vec-instance-q8_0-q4_1.cu fattn-vec-instance-q8_0-q5_0.cu fattn-vec-instance-q8_0-q5_1.cu fattn-vec-instance-q8_0-q8_0.cu generate_cu_files.py mmf-instance-ncols_1.cu mmf-instance-ncols_10.cu mmf-instance-ncols_11.cu mmf-instance-ncols_12.cu mmf-instance-ncols_13.cu mmf-instance-ncols_14.cu mmf-instance-ncols_15.cu mmf-instance-ncols_16.cu mmf-instance-ncols_2.cu mmf-instance-ncols_3.cu mmf-instance-ncols_4.cu mmf-instance-ncols_5.cu mmf-instance-ncols_6.cu mmf-instance-ncols_7.cu mmf-instance-ncols_8.cu mmf-instance-ncols_9.cu mmq-instance-iq1_s.cu mmq-instance-iq2_s.cu mmq-instance-iq2_xs.cu mmq-instance-iq2_xxs.cu mmq-instance-iq3_s.cu mmq-instance-iq3_xxs.cu mmq-instance-iq4_nl.cu mmq-instance-iq4_xs.cu mmq-instance-mxfp4.cu mmq-instance-q2_k.cu mmq-instance-q3_k.cu mmq-instance-q4_0.cu mmq-instance-q4_1.cu mmq-instance-q4_k.cu mmq-instance-q5_0.cu mmq-instance-q5_1.cu mmq-instance-q5_k.cu mmq-instance-q6_k.cu mmq-instance-q8_0.cu
vendors cuda.h hip.h musa.h
CMakeLists.txt acc.cu acc.cuh add-id.cu add-id.cuh arange.cu arange.cuh argmax.cu argmax.cuh argsort.cu argsort.cuh binbcast.cu binbcast.cuh clamp.cu clamp.cuh common.cuh concat.cu concat.cuh conv-transpose-1d.cu conv-transpose-1d.cuh conv2d-dw.cu conv2d-dw.cuh conv2d-transpose.cu conv2d-transpose.cuh conv2d.cu conv2d.cuh convert.cu convert.cuh count-equal.cu count-equal.cuh cp-async.cuh cpy-utils.cuh cpy.cu cpy.cuh cross-entropy-loss.cu cross-entropy-loss.cuh cumsum.cu cumsum.cuh dequantize.cuh diag.cu diag.cuh diagmask.cu diagmask.cuh fattn-common.cuh fattn-mma-f16.cuh fattn-tile.cu fattn-tile.cuh fattn-vec.cuh fattn-wmma-f16.cu fattn-wmma-f16.cuh fattn.cu fattn.cuh fill.cu fill.cuh getrows.cu getrows.cuh ggml-cuda.cu gla.cu gla.cuh im2col.cu im2col.cuh mean.cu mean.cuh mma.cuh mmf.cu mmf.cuh mmid.cu mmid.cuh mmq.cu mmq.cuh mmvf.cu mmvf.cuh mmvq.cu mmvq.cuh norm.cu norm.cuh opt-step-adamw.cu opt-step-adamw.cuh opt-step-sgd.cu opt-step-sgd.cuh out-prod.cu out-prod.cuh pad.cu pad.cuh pad_reflect_1d.cu pad_reflect_1d.cuh pool2d.cu pool2d.cuh quantize.cu quantize.cuh reduce_rows.cuh roll.cu roll.cuh rope.cu rope.cuh scale.cu scale.cuh set-rows.cu set-rows.cuh set.cu set.cuh softcap.cu softcap.cuh softmax.cu softmax.cuh solve_tri.cu solve_tri.cuh ssm-conv.cu ssm-conv.cuh ssm-scan.cu ssm-scan.cuh sum.cu sum.cuh sumrows.cu sumrows.cuh top-k.cu top-k.cuh topk-moe.cu topk-moe.cuh tri.cu tri.cuh tsembd.cu tsembd.cuh unary.cu unary.cuh upscale.cu upscale.cuh vecdotq.cuh wkv.cu wkv.cuh
ggml-hexagon
htp CMakeLists.txt act-ops.c argsort-ops.c binary-ops.c cmake-toolchain.cmake cpy-ops.c flash-attn-ops.c get-rows-ops.c hex-dma.c hex-dma.h hex-dump.h hex-fastdiv.h hex-utils.h htp-ctx.h htp-msg.h htp-ops.h htp_iface.idl hvx-arith.h hvx-base.h hvx-copy.h hvx-div.h hvx-dump.h hvx-exp.h hvx-floor.h hvx-inverse.h hvx-reduce.h hvx-scale.h hvx-sigmoid.h hvx-sqrt.h hvx-types.h hvx-utils.h main.c matmul-ops.c rope-ops.c set-rows-ops.c softmax-ops.c sum-rows-ops.c unary-ops.c worker-pool.c worker-pool.h
CMakeLists.txt ggml-hexagon.cpp htp-drv.cpp htp-drv.h libdl.h libggml-htp.inf op-desc.h
ggml-hip CMakeLists.txt
ggml-metal CMakeLists.txt ggml-metal-common.cpp ggml-metal-common.h ggml-metal-context.h ggml-metal-context.m ggml-metal-device.cpp ggml-metal-device.h ggml-metal-device.m ggml-metal-impl.h ggml-metal-ops.cpp ggml-metal-ops.h ggml-metal.cpp ggml-metal.metal
ggml-musa CMakeLists.txt mudnn.cu mudnn.cuh
ggml-opencl
kernels add.cl add_id.cl argsort.cl clamp.cl concat.cl conv2d.cl conv2d_f16_f32.cl cpy.cl cvt.cl diag_mask_inf.cl div.cl embed_kernel.py expm1.cl fill.cl flash_attn_f16.cl flash_attn_f32.cl flash_attn_f32_f16.cl gelu.cl gemm_moe_mxfp4_f32.cl gemv_moe_mxfp4_f32.cl gemv_noshuffle.cl gemv_noshuffle_general.cl gemv_noshuffle_general_q8_0_f32.cl get_rows.cl glu.cl group_norm.cl im2col_f16.cl im2col_f32.cl mean.cl mul.cl mul_mat_Ab_Bi_8x4.cl mul_mat_f16_f32.cl mul_mm_f16_f32_kq_kqv.cl mul_mm_f16_f32_l4_lm.cl mul_mm_f32_f32_l4_lm.cl mul_mm_q6_k_f32_l4_lm.cl mul_mm_q8_0_f32_8x4.cl mul_mm_q8_0_f32_l4_lm.cl mul_mv_f16_f16.cl mul_mv_f16_f32.cl mul_mv_f16_f32_1row.cl mul_mv_f16_f32_l4.cl mul_mv_f32_f32.cl mul_mv_id_mxfp4_f32.cl mul_mv_id_mxfp4_f32_flat.cl mul_mv_id_q4_0_f32_8x_flat.cl mul_mv_id_q8_0_f32.cl mul_mv_id_q8_0_f32_flat.cl mul_mv_mxfp4_f32.cl mul_mv_mxfp4_f32_flat.cl mul_mv_q4_0_f32.cl mul_mv_q4_0_f32_1d_16x_flat.cl mul_mv_q4_0_f32_1d_8x_flat.cl mul_mv_q4_0_f32_8x_flat.cl mul_mv_q4_0_f32_v.cl mul_mv_q4_k_f32.cl mul_mv_q6_k_f32.cl mul_mv_q6_k_f32_flat.cl mul_mv_q8_0_f32.cl mul_mv_q8_0_f32_flat.cl norm.cl pad.cl relu.cl repeat.cl rms_norm.cl rope.cl scale.cl set_rows.cl sigmoid.cl silu.cl softmax_4_f16.cl softmax_4_f32.cl softmax_f16.cl softmax_f32.cl softplus.cl solve_tri.cl sqr.cl sqrt.cl ssm_conv.cl sub.cl sum_rows.cl tanh.cl transpose.cl tri.cl tsembd.cl upscale.cl
CMakeLists.txt ggml-opencl.cpp
ggml-rpc CMakeLists.txt ggml-rpc.cpp
ggml-sycl
dpct helper.hpp
CMakeLists.txt add-id.cpp add-id.hpp backend.hpp binbcast.cpp binbcast.hpp common.cpp common.hpp concat.cpp concat.hpp conv.cpp conv.hpp convert.cpp convert.hpp count-equal.cpp count-equal.hpp cpy.cpp cpy.hpp dequantize.hpp dmmv.cpp dmmv.hpp element_wise.cpp element_wise.hpp gemm.hpp getrows.cpp getrows.hpp ggml-sycl.cpp gla.cpp gla.hpp im2col.cpp im2col.hpp mmq.cpp mmq.hpp mmvq.cpp mmvq.hpp norm.cpp norm.hpp outprod.cpp outprod.hpp pad.cpp pad.hpp pad_reflect_1d.cpp pad_reflect_1d.hpp presets.hpp quantize.hpp quants.hpp repeat_back.cpp repeat_back.hpp roll.cpp roll.hpp rope.cpp rope.hpp set.cpp set.hpp set_rows.cpp set_rows.hpp softmax.cpp softmax.hpp ssm_conv.cpp ssm_conv.hpp sycl_hw.cpp sycl_hw.hpp tsembd.cpp tsembd.hpp vecdotq.hpp wkv.cpp wkv.hpp
ggml-virtgpu
backend
shared api_remoting.h apir_backend.gen.h apir_backend.h apir_cs.h apir_cs_ggml.h apir_cs_rpc.h
CMakeLists.txt apir_cs_ggml-rpc-back.cpp backend-convert.h backend-dispatched-backend.cpp backend-dispatched-buffer-type.cpp backend-dispatched-buffer.cpp backend-dispatched-device.cpp backend-dispatched.cpp backend-dispatched.gen.h backend-dispatched.h backend-virgl-apir.h backend.cpp
include apir_hw.h
CMakeLists.txt apir_cs_ggml-rpc-front.cpp ggml-backend-buffer-type.cpp ggml-backend-buffer.cpp ggml-backend-device.cpp ggml-backend-reg.cpp ggml-backend.cpp ggml-remoting.h ggmlremoting_functions.yaml regenerate_remoting.py virtgpu-apir.h virtgpu-forward-backend.cpp virtgpu-forward-buffer-type.cpp virtgpu-forward-buffer.cpp virtgpu-forward-device.cpp virtgpu-forward-impl.h virtgpu-forward.gen.h virtgpu-shm.cpp virtgpu-shm.h virtgpu-utils.cpp virtgpu-utils.h virtgpu.cpp virtgpu.h
ggml-vulkan
cmake host-toolchain.cmake.in
vulkan-shaders
feature-tests bfloat16.comp coopmat.comp coopmat2.comp integer_dot.comp
CMakeLists.txt abs.comp acc.comp add.comp add1.comp add_id.comp arange.comp argmax.comp argsort.comp argsort_large.comp ceil.comp clamp.comp concat.comp contig_copy.comp conv2d_dw.comp conv2d_mm.comp conv_transpose_1d.comp copy.comp copy_from_quant.comp copy_to_quant.comp copy_transpose.comp cos.comp count_equal.comp count_experts.comp cumsum.comp cumsum_multipass1.comp cumsum_multipass2.comp dequant_f32.comp dequant_funcs.glsl dequant_funcs_cm2.glsl dequant_head.glsl dequant_iq1_m.comp dequant_iq1_s.comp dequant_iq2_s.comp dequant_iq2_xs.comp dequant_iq2_xxs.comp dequant_iq3_s.comp dequant_iq3_xxs.comp dequant_iq4_nl.comp dequant_iq4_xs.comp dequant_mxfp4.comp dequant_q2_k.comp dequant_q3_k.comp dequant_q4_0.comp dequant_q4_1.comp dequant_q4_k.comp dequant_q5_0.comp dequant_q5_1.comp dequant_q5_k.comp dequant_q6_k.comp dequant_q8_0.comp diag.comp diag_mask_inf.comp div.comp exp.comp fill.comp flash_attn.comp flash_attn_base.glsl flash_attn_cm1.comp flash_attn_cm2.comp flash_attn_mask_opt.comp flash_attn_split_k_reduce.comp floor.comp geglu.comp geglu_erf.comp geglu_quick.comp gelu.comp gelu_erf.comp gelu_quick.comp generic_binary_head.glsl generic_head.glsl generic_unary_head.glsl get_rows.comp get_rows_quant.comp glu_head.glsl glu_main.glsl group_norm.comp hardsigmoid.comp hardswish.comp im2col.comp im2col_3d.comp l2_norm.comp leaky_relu.comp log.comp mul.comp mul_mat_split_k_reduce.comp mul_mat_vec.comp mul_mat_vec_base.glsl mul_mat_vec_iface.glsl mul_mat_vec_iq1_m.comp mul_mat_vec_iq1_s.comp mul_mat_vec_iq2_s.comp mul_mat_vec_iq2_xs.comp mul_mat_vec_iq2_xxs.comp mul_mat_vec_iq3_s.comp mul_mat_vec_iq3_xxs.comp mul_mat_vec_nc.comp mul_mat_vec_p021.comp mul_mat_vec_q2_k.comp mul_mat_vec_q3_k.comp mul_mat_vec_q4_k.comp mul_mat_vec_q5_k.comp mul_mat_vec_q6_k.comp mul_mat_vecq.comp mul_mat_vecq_funcs.glsl mul_mm.comp mul_mm_cm2.comp mul_mm_funcs.glsl mul_mm_id_funcs.glsl mul_mmq.comp mul_mmq_funcs.glsl mul_mmq_shmem_types.glsl multi_add.comp neg.comp norm.comp opt_step_adamw.comp opt_step_sgd.comp pad.comp pool2d.comp quantize_q8_1.comp reglu.comp relu.comp repeat.comp repeat_back.comp rms_norm.comp rms_norm_back.comp rms_norm_partials.comp roll.comp rope_funcs.glsl rope_head.glsl rope_multi.comp rope_neox.comp rope_norm.comp rope_params.glsl rope_vision.comp round.comp rte.glsl scale.comp sigmoid.comp silu.comp silu_back.comp sin.comp soft_max.comp soft_max_back.comp soft_max_large1.comp soft_max_large2.comp soft_max_large3.comp soft_max_large_common.glsl softplus.comp solve_tri.comp sqrt.comp square.comp ssm_conv.comp ssm_scan.comp step.comp sub.comp sum_rows.comp sum_rows.glsl swiglu.comp swiglu_oai.comp tanh.comp timestep_embedding.comp topk_argsort.comp topk_moe.comp topk_nary_search.comp tri.comp trunc.comp types.glsl upscale.comp utils.glsl vulkan-shaders-gen.cpp wkv6.comp wkv7.comp xielu.comp
CMakeLists.txt ggml-vulkan.cpp
ggml-webgpu
wgsl-shaders argmax.wgsl argsort.wgsl argsort_merge.wgsl binary.wgsl common_decls.tmpl cpy.tmpl.wgsl cumsum.wgsl embed_wgsl.py flash_attn.wgsl get_rows.tmpl.wgsl glu.tmpl.wgsl memset.wgsl mul_mat.tmpl.wgsl mul_mat_decls.tmpl mul_mat_reg_tile.tmpl.wgsl mul_mat_subgroup_matrix.tmpl.wgsl mul_mat_vec.tmpl.wgsl pad.wgsl rms_norm.wgsl rope.tmpl.wgsl scale.tmpl.wgsl set_rows.wgsl soft_max.tmpl.wgsl sum_rows.wgsl unary.wgsl
CMakeLists.txt ggml-webgpu-shader-lib.hpp ggml-webgpu.cpp pre_wgsl.hpp
ggml-zdnn .gitignore CMakeLists.txt common.hpp ggml-zdnn.cpp mmf.cpp mmf.hpp utils.cpp utils.hpp
ggml-zendnn CMakeLists.txt ggml-zendnn.cpp
CMakeLists.txt ggml-alloc.c ggml-backend-dl.cpp ggml-backend-dl.h ggml-backend-impl.h ggml-backend-reg.cpp ggml-backend.cpp ggml-common.h ggml-impl.h ggml-opt.cpp ggml-quants.c ggml-quants.h ggml-threading.cpp ggml-threading.h ggml.c ggml.cpp gguf.cpp
.gitignore CMakeLists.txt
gguf-py
examples reader.py writer.py
gguf
scripts gguf_convert_endian.py gguf_dump.py gguf_editor_gui.py gguf_hash.py gguf_new_metadata.py gguf_set_metadata.py
__init__.py constants.py gguf.py gguf_reader.py gguf_writer.py lazy.py metadata.py py.typed quants.py tensor_mapping.py utility.py vocab.py
tests __init__.py test_metadata.py test_quants.py
LICENSE README.md pyproject.toml
grammars README.md arithmetic.gbnf c.gbnf chess.gbnf english.gbnf japanese.gbnf json.gbnf json_arr.gbnf list.gbnf
include llama-cpp.h llama.h
licenses LICENSE-jsonhpp
media llama0-banner.png llama0-logo.png llama1-banner.png llama1-icon-transparent.png llama1-icon-transparent.svg llama1-icon.png llama1-icon.svg llama1-logo.png llama1-logo.svg matmul.png matmul.svg
models
templates Apertus-8B-Instruct.jinja ByteDance-Seed-OSS.jinja CohereForAI-c4ai-command-r-plus-tool_use.jinja CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja GLM-4.6.jinja Kimi-K2-Instruct.jinja Kimi-K2-Thinking.jinja MiMo-VL.jinja MiniMax-M2.jinja Mistral-Small-3.2-24B-Instruct-2506.jinja NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja NVIDIA-Nemotron-Nano-v2.jinja NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja Qwen-QwQ-32B.jinja Qwen-Qwen2.5-7B-Instruct.jinja Qwen-Qwen3-0.6B.jinja Qwen3-Coder.jinja README.md deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja deepseek-ai-DeepSeek-V3.1.jinja fireworks-ai-llama-3-firefunction-v2.jinja google-gemma-2-2b-it.jinja ibm-granite-granite-3.3-2B-Instruct.jinja llama-cpp-deepseek-r1.jinja llama-cpp-lfm2.jinja llama-cpp-rwkv-world.jinja meetkai-functionary-medium-v3.1.jinja meetkai-functionary-medium-v3.2.jinja meta-llama-Llama-3.1-8B-Instruct.jinja meta-llama-Llama-3.2-3B-Instruct.jinja meta-llama-Llama-3.3-70B-Instruct.jinja microsoft-Phi-3.5-mini-instruct.jinja mistralai-Ministral-3-14B-Reasoning-2512.jinja mistralai-Mistral-Nemo-Instruct-2407.jinja moonshotai-Kimi-K2.jinja openai-gpt-oss-120b.jinja unsloth-Apriel-1.5.jinja unsloth-mistral-Devstral-Small-2507.jinja upstage-Solar-Open-100B.jinja
.editorconfig ggml-vocab-aquila.gguf ggml-vocab-baichuan.gguf ggml-vocab-bert-bge.gguf ggml-vocab-bert-bge.gguf.inp ggml-vocab-bert-bge.gguf.out ggml-vocab-command-r.gguf ggml-vocab-command-r.gguf.inp ggml-vocab-command-r.gguf.out ggml-vocab-deepseek-coder.gguf ggml-vocab-deepseek-coder.gguf.inp ggml-vocab-deepseek-coder.gguf.out ggml-vocab-deepseek-llm.gguf ggml-vocab-deepseek-llm.gguf.inp ggml-vocab-deepseek-llm.gguf.out ggml-vocab-falcon.gguf ggml-vocab-falcon.gguf.inp ggml-vocab-falcon.gguf.out ggml-vocab-gpt-2.gguf ggml-vocab-gpt-2.gguf.inp ggml-vocab-gpt-2.gguf.out ggml-vocab-gpt-neox.gguf ggml-vocab-llama-bpe.gguf ggml-vocab-llama-bpe.gguf.inp ggml-vocab-llama-bpe.gguf.out ggml-vocab-llama-spm.gguf ggml-vocab-llama-spm.gguf.inp ggml-vocab-llama-spm.gguf.out ggml-vocab-mpt.gguf ggml-vocab-mpt.gguf.inp ggml-vocab-mpt.gguf.out ggml-vocab-nomic-bert-moe.gguf ggml-vocab-phi-3.gguf ggml-vocab-phi-3.gguf.inp ggml-vocab-phi-3.gguf.out ggml-vocab-qwen2.gguf ggml-vocab-qwen2.gguf.inp ggml-vocab-qwen2.gguf.out ggml-vocab-refact.gguf ggml-vocab-refact.gguf.inp ggml-vocab-refact.gguf.out ggml-vocab-starcoder.gguf ggml-vocab-starcoder.gguf.inp ggml-vocab-starcoder.gguf.out
pocs
vdot CMakeLists.txt q8dot.cpp vdot.cpp
CMakeLists.txt
requirements requirements-all.txt requirements-compare-llama-bench.txt requirements-convert_hf_to_gguf.txt requirements-convert_hf_to_gguf_update.txt requirements-convert_legacy_llama.txt requirements-convert_llama_ggml_to_gguf.txt requirements-convert_lora_to_gguf.txt requirements-gguf_editor_gui.txt requirements-pydantic.txt requirements-server-bench.txt requirements-test-tokenizer-random.txt requirements-tool_bench.txt
scripts
apple validate-apps.sh validate-ios.sh validate-macos.sh validate-tvos.sh validate-visionos.sh
jinja jinja-tester.py requirements.txt
snapdragon
adb llama-cli.farf run-bench.sh run-cli.sh run-completion.sh run-mtmd.sh run-tool.sh
qdc
tests test_bench.py
readme.md requirements.txt
windows run-bench.ps1 run-cli.ps1 run-tool.ps1 setup-build.ps1
bench-models.sh build-info.sh check-requirements.sh compare-commits.sh compare-llama-bench.py compare-logprobs.py create_ops_docs.py debug-test.sh fetch_server_test_models.py gen-authors.sh gen-unicode-data.py get-flags.mk get-hellaswag.sh get-pg.sh get-wikitext-103.sh get-wikitext-2.sh get-winogrande.sh get_chat_template.py hf.sh install-oneapi.bat pr2wt.sh serve-static.js server-bench.py sync-ggml-am.sh sync-ggml.last sync-ggml.sh sync_vendor.py tool_bench.py tool_bench.sh verify-checksum-models.py xxd.cmake
src
models afmoe.cpp apertus.cpp arcee.cpp arctic.cpp arwkv7.cpp baichuan.cpp bailingmoe.cpp bailingmoe2.cpp bert.cpp bitnet.cpp bloom.cpp chameleon.cpp chatglm.cpp codeshell.cpp cogvlm.cpp cohere2-iswa.cpp command-r.cpp dbrx.cpp deci.cpp deepseek.cpp deepseek2.cpp dots1.cpp dream.cpp ernie4-5-moe.cpp ernie4-5.cpp exaone-moe.cpp exaone.cpp exaone4.cpp falcon-h1.cpp falcon.cpp gemma-embedding.cpp gemma.cpp gemma2-iswa.cpp gemma3.cpp gemma3n-iswa.cpp glm4-moe.cpp glm4.cpp gpt2.cpp gptneox.cpp granite-hybrid.cpp granite.cpp graph-context-mamba.cpp grok.cpp grovemoe.cpp hunyuan-dense.cpp hunyuan-moe.cpp internlm2.cpp jais.cpp jamba.cpp kimi-linear.cpp lfm2.cpp llada-moe.cpp llada.cpp llama-iswa.cpp llama.cpp maincoder.cpp mamba.cpp mimo2-iswa.cpp minicpm3.cpp minimax-m2.cpp mistral3.cpp models.h modern-bert.cpp mpt.cpp nemotron-h.cpp nemotron.cpp neo-bert.cpp olmo.cpp olmo2.cpp olmoe.cpp openai-moe-iswa.cpp openelm.cpp orion.cpp pangu-embedded.cpp phi2.cpp phi3.cpp plamo.cpp plamo2.cpp plamo3.cpp plm.cpp qwen.cpp qwen2.cpp qwen2moe.cpp qwen2vl.cpp qwen3.cpp qwen35.cpp qwen35moe.cpp qwen3moe.cpp qwen3next.cpp qwen3vl-moe.cpp qwen3vl.cpp refact.cpp rnd1.cpp rwkv6-base.cpp rwkv6.cpp rwkv6qwen2.cpp rwkv7-base.cpp rwkv7.cpp seed-oss.cpp smallthinker.cpp smollm3.cpp stablelm.cpp starcoder.cpp starcoder2.cpp step35-iswa.cpp t5-dec.cpp t5-enc.cpp wavtokenizer-dec.cpp xverse.cpp
CMakeLists.txt llama-adapter.cpp llama-adapter.h llama-arch.cpp llama-arch.h llama-batch.cpp llama-batch.h llama-chat.cpp llama-chat.h llama-context.cpp llama-context.h llama-cparams.cpp llama-cparams.h llama-grammar.cpp llama-grammar.h llama-graph.cpp llama-graph.h llama-hparams.cpp llama-hparams.h llama-impl.cpp llama-impl.h llama-io.cpp llama-io.h llama-kv-cache-iswa.cpp llama-kv-cache-iswa.h llama-kv-cache.cpp llama-kv-cache.h llama-kv-cells.h llama-memory-hybrid-iswa.cpp llama-memory-hybrid-iswa.h llama-memory-hybrid.cpp llama-memory-hybrid.h llama-memory-recurrent.cpp llama-memory-recurrent.h llama-memory.cpp llama-memory.h llama-mmap.cpp llama-mmap.h llama-model-loader.cpp llama-model-loader.h llama-model-saver.cpp llama-model-saver.h llama-model.cpp llama-model.h llama-quant.cpp llama-quant.h llama-sampler.cpp llama-sampler.h llama-vocab.cpp llama-vocab.h llama.cpp unicode-data.cpp unicode-data.h unicode.cpp unicode.h
tests
peg-parser simple-tokenize.cpp simple-tokenize.h test-basic.cpp test-gbnf-generation.cpp test-json-parser.cpp test-json-serialization.cpp test-unicode.cpp tests.h
.gitignore CMakeLists.txt get-model.cpp get-model.h run-json-schema-to-grammar.mjs test-alloc.cpp test-arg-parser.cpp test-autorelease.cpp test-backend-ops.cpp test-backend-sampler.cpp test-barrier.cpp test-c.c test-chat-parser.cpp test-chat-peg-parser.cpp test-chat-template.cpp test-chat.cpp test-double-float.cpp test-gbnf-validator.cpp test-gguf.cpp test-grammar-integration.cpp test-grammar-llguidance.cpp test-grammar-parser.cpp test-jinja.cpp test-json-partial.cpp test-json-schema-to-grammar.cpp test-llama-grammar.cpp test-log.cpp test-lora-conversion-inference.sh test-model-load-cancel.cpp test-mtmd-c-api.c test-opt.cpp test-peg-parser.cpp test-quantize-fns.cpp test-quantize-perf.cpp test-quantize-stats.cpp test-regex-partial.cpp test-rope.cpp test-sampling.cpp test-state-restore-fragmented.cpp test-thread-safety.cpp test-tokenizer-0.cpp test-tokenizer-0.py test-tokenizer-0.sh test-tokenizer-1-bpe.cpp test-tokenizer-1-spm.cpp test-tokenizer-random.py test-tokenizers-repo.sh testing.h
tools
batched-bench CMakeLists.txt README.md batched-bench.cpp
cli CMakeLists.txt README.md cli.cpp
completion CMakeLists.txt README.md completion.cpp
cvector-generator CMakeLists.txt README.md completions.txt cvector-generator.cpp mean.hpp negative.txt pca.hpp positive.txt
export-lora CMakeLists.txt README.md export-lora.cpp
fit-params CMakeLists.txt README.md fit-params.cpp
gguf-split CMakeLists.txt README.md gguf-split.cpp tests.sh
imatrix CMakeLists.txt README.md imatrix.cpp
llama-bench CMakeLists.txt README.md llama-bench.cpp
mtmd
legacy-models convert_image_encoder_to_gguf.py glmedge-convert-image-encoder-to-gguf.py glmedge-surgery.py llava_surgery.py llava_surgery_v2.py minicpmv-convert-image-encoder-to-gguf.py minicpmv-surgery.py
models cogvlm.cpp conformer.cpp glm4v.cpp internvl.cpp kimik25.cpp kimivl.cpp llama4.cpp llava.cpp minicpmv.cpp mobilenetv5.cpp models.h pixtral.cpp qwen2vl.cpp qwen3vl.cpp siglip.cpp whisper-enc.cpp youtuvl.cpp
CMakeLists.txt README.md clip-graph.h clip-impl.h clip-model.h clip.cpp clip.h deprecation-warning.cpp mtmd-audio.cpp mtmd-audio.h mtmd-cli.cpp mtmd-helper.cpp mtmd-helper.h mtmd.cpp mtmd.h requirements.txt test-1.jpeg test-2.mp3 tests.sh
perplexity CMakeLists.txt README.md perplexity.cpp
quantize CMakeLists.txt README.md quantize.cpp tests.sh
rpc CMakeLists.txt README.md rpc-server.cpp
server
bench README.md bench.py prometheus.yml requirements.txt script.js
public index.html.gz loading.html
public_legacy colorthemes.css completion.js favicon.ico index-new.html index.html index.js json-schema-to-grammar.mjs loading.html prompt-formats.js style.css system-prompts.js theme-beeninorder.css theme-ketivah.css theme-mangotango.css theme-playground.css theme-polarnight.css theme-snowstorm.css
public_simplechat datautils.mjs index.html readme.md simplechat.css simplechat.js simplechat_screens.webp ui.mjs
tests
unit test_basic.py test_chat_completion.py test_compat_anthropic.py test_compat_oai_responses.py test_completion.py test_ctx_shift.py test_embedding.py test_infill.py test_lora.py test_rerank.py test_router.py test_security.py test_sleep.py test_slot_save.py test_speculative.py test_template.py test_tokenize.py test_tool_call.py test_vision_api.py
.gitignore README.md conftest.py pytest.ini requirements.txt tests.sh utils.py
themes
buttons-top README.md buttons_top.png favicon.ico index.html
wild README.md favicon.ico index.html llama_cpp.png llamapattern.png wild.png
README.md
webui
.storybook ModeWatcherDecorator.svelte TooltipProviderDecorator.svelte main.ts preview.ts vitest.setup.ts
docs
architecture high-level-architecture-simplified.md high-level-architecture.md
flows chat-flow.md conversations-flow.md data-flow-simplified-model-mode.md data-flow-simplified-router-mode.md database-flow.md models-flow.md server-flow.md settings-flow.md
scripts dev.sh install-git-hooks.sh post-build.sh
src
lib
components
app
chat
ChatAttachments ChatAttachmentPreview.svelte ChatAttachmentThumbnailFile.svelte ChatAttachmentThumbnailImage.svelte ChatAttachmentsList.svelte ChatAttachmentsViewAll.svelte
ChatForm
ChatFormActions ChatFormActionFileAttachments.svelte ChatFormActionRecord.svelte ChatFormActionSubmit.svelte ChatFormActions.svelte
ChatForm.svelte ChatFormFileInputInvisible.svelte ChatFormHelperText.svelte ChatFormTextarea.svelte
ChatMessages ChatMessage.svelte ChatMessageActions.svelte ChatMessageAssistant.svelte ChatMessageBranchingControls.svelte ChatMessageEditForm.svelte ChatMessageStatistics.svelte ChatMessageSystem.svelte ChatMessageThinkingBlock.svelte ChatMessageUser.svelte ChatMessages.svelte
ChatScreen ChatScreen.svelte ChatScreenDragOverlay.svelte ChatScreenHeader.svelte ChatScreenProcessingInfo.svelte
ChatSettings ChatSettings.svelte ChatSettingsFields.svelte ChatSettingsFooter.svelte ChatSettingsImportExportTab.svelte ChatSettingsParameterSourceIndicator.svelte
ChatSidebar ChatSidebar.svelte ChatSidebarActions.svelte ChatSidebarConversationItem.svelte ChatSidebarSearch.svelte handle-mobile-sidebar-item-click.ts
dialogs DialogChatAttachmentPreview.svelte DialogChatAttachmentsViewAll.svelte DialogChatError.svelte DialogChatSettings.svelte DialogConfirmation.svelte DialogConversationSelection.svelte DialogConversationTitleUpdate.svelte DialogEmptyFileAlert.svelte DialogModelInformation.svelte DialogModelNotAvailable.svelte
misc ActionButton.svelte ActionDropdown.svelte BadgeChatStatistic.svelte BadgeInfo.svelte BadgeModality.svelte CodePreviewDialog.svelte ConversationSelection.svelte CopyToClipboardIcon.svelte KeyboardShortcutInfo.svelte MarkdownContent.svelte RemoveButton.svelte SearchInput.svelte SyntaxHighlightedCode.svelte
models ModelBadge.svelte ModelsSelector.svelte
server ServerErrorSplash.svelte ServerLoadingSplash.svelte ServerStatus.svelte
index.ts
ui
alert alert-description.svelte alert-title.svelte alert.svelte index.ts
alert-dialog alert-dialog-action.svelte alert-dialog-cancel.svelte alert-dialog-content.svelte alert-dialog-description.svelte alert-dialog-footer.svelte alert-dialog-header.svelte alert-dialog-overlay.svelte alert-dialog-title.svelte alert-dialog-trigger.svelte index.ts
badge badge.svelte index.ts
button button.svelte index.ts
card card-action.svelte card-content.svelte card-description.svelte card-footer.svelte card-header.svelte card-title.svelte card.svelte index.ts
checkbox checkbox.svelte index.ts
collapsible collapsible-content.svelte collapsible-trigger.svelte collapsible.svelte index.ts
dialog dialog-close.svelte dialog-content.svelte dialog-description.svelte dialog-footer.svelte dialog-header.svelte dialog-overlay.svelte dialog-title.svelte dialog-trigger.svelte index.ts
dropdown-menu dropdown-menu-checkbox-item.svelte dropdown-menu-content.svelte dropdown-menu-group-heading.svelte dropdown-menu-group.svelte dropdown-menu-item.svelte dropdown-menu-label.svelte dropdown-menu-radio-group.svelte dropdown-menu-radio-item.svelte dropdown-menu-separator.svelte dropdown-menu-shortcut.svelte dropdown-menu-sub-content.svelte dropdown-menu-sub-trigger.svelte dropdown-menu-trigger.svelte index.ts
input index.ts input.svelte
label index.ts label.svelte
popover index.ts popover-close.svelte popover-content.svelte popover-portal.svelte popover-trigger.svelte popover.svelte
scroll-area index.ts scroll-area-scrollbar.svelte scroll-area.svelte
select index.ts select-content.svelte select-group-heading.svelte select-group.svelte select-item.svelte select-label.svelte select-scroll-down-button.svelte select-scroll-up-button.svelte select-separator.svelte select-trigger.svelte
separator index.ts separator.svelte
sheet index.ts sheet-close.svelte sheet-content.svelte sheet-description.svelte sheet-footer.svelte sheet-header.svelte sheet-overlay.svelte sheet-title.svelte sheet-trigger.svelte
sidebar constants.ts context.svelte.ts index.ts sidebar-content.svelte sidebar-footer.svelte sidebar-group-action.svelte sidebar-group-content.svelte sidebar-group-label.svelte sidebar-group.svelte sidebar-header.svelte sidebar-input.svelte sidebar-inset.svelte sidebar-menu-action.svelte sidebar-menu-badge.svelte sidebar-menu-button.svelte sidebar-menu-item.svelte sidebar-menu-skeleton.svelte sidebar-menu-sub-button.svelte sidebar-menu-sub-item.svelte sidebar-menu-sub.svelte sidebar-menu.svelte sidebar-provider.svelte sidebar-rail.svelte sidebar-separator.svelte sidebar-trigger.svelte sidebar.svelte
skeleton index.ts skeleton.svelte
switch index.ts switch.svelte
table index.ts table-body.svelte table-caption.svelte table-cell.svelte table-footer.svelte table-head.svelte table-header.svelte table-row.svelte table.svelte
textarea index.ts textarea.svelte
tooltip index.ts tooltip-content.svelte tooltip-trigger.svelte
utils.ts
constants auto-scroll.ts binary-detection.ts default-context.ts floating-ui-constraints.ts icons.ts input-classes.ts latex-protection.ts literal-html.ts localstorage-keys.ts max-bundle-size.ts precision.ts processing-info.ts settings-config.ts supported-file-types.ts table-html-restorer.ts tooltip-config.ts viewport.ts
enums attachment.ts chat.ts files.ts index.ts model.ts server.ts
hooks is-mobile.svelte.ts use-model-change-validation.svelte.ts use-processing-state.svelte.ts
markdown enhance-code-blocks.ts enhance-links.ts literal-html.ts table-html-restorer.ts
services chat.ts database.ts index.ts models.ts parameter-sync.spec.ts parameter-sync.ts props.ts
stores chat.svelte.ts conversations.svelte.ts models.svelte.ts persisted.svelte.ts server.svelte.ts settings.svelte.ts
types api.d.ts chat.d.ts database.d.ts index.ts models.d.ts settings.d.ts
utils api-headers.ts api-key-validation.ts attachment-display.ts attachment-type.ts audio-recording.ts autoresize-textarea.ts branching.ts browser-only.ts clipboard.ts config-helpers.ts conversation-utils.ts convert-files-to-extra.ts file-preview.ts file-type.ts formatters.ts index.ts is-ime-composing.ts latex-protection.ts modality-file-validation.ts model-names.ts pdf-processing.ts portal-to-body.ts precision.ts process-uploaded-files.ts svg-to-png.ts syntax-highlight-language.ts text-files.ts text.ts webp-to-png.ts
routes
chat
[id] +page.svelte +page.ts
+error.svelte +layout.svelte +page.svelte +page.ts
styles katex-custom.scss
app.css app.d.ts app.html
static favicon.svg loading.html
tests
client
components TestWrapper.svelte
page.svelte.test.ts
e2e demo.test.ts
stories
fixtures
assets 1.jpg beautiful-flowers-lotus.webp example.pdf hf-logo.svg
ai-tutorial.ts api-docs.ts blog-post.ts data-analysis.ts empty.ts math-formulas.ts readme.ts storybook-mocks.ts
ChatForm.stories.svelte ChatMessage.stories.svelte ChatSettings.stories.svelte ChatSidebar.stories.svelte Introduction.mdx MarkdownContent.stories.svelte
unit clipboard.test.ts latex-protection.test.ts model-names.test.ts
.gitignore .npmrc .prettierignore .prettierrc README.md components.json eslint.config.js package-lock.json package.json playwright.config.ts svelte.config.js tsconfig.json vite.config.ts vitest-setup-client.ts
CMakeLists.txt README-dev.md README.md chat-llama2.sh chat.mjs chat.sh server-common.cpp server-common.h server-context.cpp server-context.h server-http.cpp server-http.h server-models.cpp server-models.h server-queue.cpp server-queue.h server-task.cpp server-task.h server.cpp
tokenize CMakeLists.txt tokenize.cpp
tts CMakeLists.txt README.md convert_pt_to_hf.py tts-outetts.py tts.cpp
CMakeLists.txt
vendor
cpp-httplib CMakeLists.txt LICENSE httplib.cpp httplib.h
miniaudio miniaudio.h
nlohmann json.hpp json_fwd.hpp
sheredom subprocess.h
stb stb_image.h
.clang-format .clang-tidy .dockerignore .ecrc .editorconfig .flake8 .gitignore .gitmodules .pre-commit-config.yaml AGENTS.md AUTHORS CLAUDE.md CMakeLists.txt CMakePresets.json CODEOWNERS CONTRIBUTING.md LICENSE Makefile README.md SECURITY.md convert_hf_to_gguf.py convert_hf_to_gguf_update.py convert_llama_ggml_to_gguf.py convert_lora_to_gguf.py flake.lock flake.nix mypy.ini poetry.lock pyproject.toml pyrightconfig.json requirements.txt
maps map1.h map1.txt
papers 2310.11703v2.pdf 2405.14159v2.pdf
prompts lotr.h lotr.txt
.gitignore Dockerfile Makefile README.md compile_flags.txt context.c game.c makext.mk mapeditor.html maps.h minunit.h models.h models.txt nonstd.h npc.c termbox2.h vectordb.c vectordb.h
llama.cpp/tools/server/themes/buttons-top/index.html raw
   1<html>
   2
   3<head>
   4  <meta charset="UTF-8">
   5  <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1" />
   6  <meta name="color-scheme" content="light dark">
   7  <title>llama.cpp - chat</title>
   8
   9  <style>
  10    body {
  11      font-family: system-ui;
  12      font-size: 90%;
  13    }
  14
  15    #container {
  16      margin: 0em auto;
  17      display: flex;
  18      flex-direction: column;
  19      justify-content: space-between;
  20      height: 100%;
  21    }
  22
  23    main {
  24      margin: 3px;
  25      display: flex;
  26      flex-direction: column;
  27      justify-content: space-between;
  28      gap: 1em;
  29
  30      flex-grow: 1;
  31      overflow-y: auto;
  32
  33      border: 1px solid #ccc;
  34      border-radius: 5px;
  35      padding: 0.5em;
  36    }
  37
  38    body {
  39      max-width: 600px;
  40      min-width: 300px;
  41      line-height: 1.2;
  42      margin: 0 auto;
  43      padding: 0 0.5em;
  44    }
  45
  46    p {
  47      overflow-wrap: break-word;
  48      word-wrap: break-word;
  49      hyphens: auto;
  50      margin-top: 0.5em;
  51      margin-bottom: 0.5em;
  52    }
  53
  54    #write form {
  55      margin: 1em 0 0 0;
  56      display: flex;
  57      flex-direction: column;
  58      gap: 0.5em;
  59      align-items: stretch;
  60    }
  61
  62    .right {
  63      display: flex;
  64      flex-direction: row;
  65      gap: 0.5em;
  66      justify-content: flex-end;
  67    }
  68
  69    fieldset {
  70      border: none;
  71      padding: 0;
  72      margin: 0;
  73    }
  74
  75    fieldset.two {
  76      display: grid;
  77      grid-template: "a a";
  78      gap: 1em;
  79    }
  80
  81    fieldset.three {
  82      display: grid;
  83      grid-template: "a a a";
  84      gap: 1em;
  85    }
  86
  87    details {
  88      border: 1px solid #aaa;
  89      border-radius: 4px;
  90      padding: 0.5em 0.5em 0;
  91      margin-top: 0.5em;
  92    }
  93
  94    summary {
  95      font-weight: bold;
  96      margin: -0.5em -0.5em 0;
  97      padding: 0.5em;
  98      cursor: pointer;
  99    }
 100
 101    details[open] {
 102      padding: 0.5em;
 103    }
 104
 105    .prob-set {
 106      padding: 0.3em;
 107      border-bottom: 1px solid #ccc;
 108    }
 109
 110    .popover-content {
 111      position: absolute;
 112      background-color: white;
 113      padding: 0.2em;
 114      box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
 115    }
 116
 117    textarea {
 118      padding: 5px;
 119      flex-grow: 1;
 120      width: 100%;
 121    }
 122
 123    pre code {
 124      display: block;
 125      background-color: #222;
 126      color: #ddd;
 127    }
 128
 129    code {
 130      font-family: monospace;
 131      padding: 0.1em 0.3em;
 132      border-radius: 3px;
 133    }
 134
 135    fieldset label {
 136      margin: 0.5em 0;
 137      display: block;
 138    }
 139
 140    fieldset label.slim {
 141      margin: 0 0.5em;
 142      display: inline;
 143    }
 144
 145    header,
 146    footer {
 147      text-align: center;
 148    }
 149
 150    footer {
 151      font-size: 80%;
 152      color: #888;
 153    }
 154
 155    .mode-chat textarea[name=prompt] {
 156      height: 4.5em;
 157    }
 158
 159    .mode-completion textarea[name=prompt] {
 160      height: 10em;
 161    }
 162
 163    [contenteditable] {
 164      display: inline-block;
 165      white-space: pre-wrap;
 166      outline: 0px solid transparent;
 167    }
 168
 169    @keyframes loading-bg-wipe {
 170      0% {
 171        background-position: 0%;
 172      }
 173
 174      100% {
 175        background-position: 100%;
 176      }
 177    }
 178
 179    .loading {
 180      --loading-color-1: #eeeeee00;
 181      --loading-color-2: #eeeeeeff;
 182      background-size: 50% 100%;
 183      background-image: linear-gradient(90deg, var(--loading-color-1), var(--loading-color-2), var(--loading-color-1));
 184      animation: loading-bg-wipe 2s linear infinite;
 185    }
 186
 187    @media (prefers-color-scheme: dark) {
 188      .loading {
 189        --loading-color-1: #22222200;
 190        --loading-color-2: #222222ff;
 191      }
 192
 193      .popover-content {
 194        background-color: black;
 195      }
 196    }
 197  </style>
 198
 199  <script type="module">
 200    import {
 201      html, h, signal, effect, computed, render, useSignal, useEffect, useRef, Component
 202    } from './index.js';
 203
 204    import { llama } from './completion.js';
 205    import { SchemaConverter } from './json-schema-to-grammar.mjs';
 206    let selected_image = false;
 207    var slot_id = -1;
 208
 209    const session = signal({
 210      prompt: "This is a conversation between User and Llama, a friendly chatbot. Llama is helpful, kind, honest, good at writing, and never fails to answer any requests immediately and with precision.",
 211      template: "{{prompt}}\n\n{{history}}\n{{char}}:",
 212      historyTemplate: "{{name}}: {{message}}",
 213      transcript: [],
 214      type: "chat",  // "chat" | "completion"
 215      char: "Llama",
 216      user: "User",
 217      image_selected: ''
 218    })
 219
 220    const params = signal({
 221      n_predict: 400,
 222      temperature: 0.7,
 223      repeat_last_n: 256, // 0 = disable penalty, -1 = context size
 224      repeat_penalty: 1.18, // 1.0 = disabled
 225      top_k: 40, // <= 0 to use vocab size
 226      top_p: 0.95, // 1.0 = disabled
 227      min_p: 0.05, // 0 = disabled
 228      typical_p: 1.0, // 1.0 = disabled
 229      presence_penalty: 0.0, // 0.0 = disabled
 230      frequency_penalty: 0.0, // 0.0 = disabled
 231      mirostat: 0, // 0/1/2
 232      mirostat_tau: 5, // target entropy
 233      mirostat_eta: 0.1, // learning rate
 234      grammar: '',
 235      n_probs: 0, // no completion_probabilities,
 236      min_keep: 0, // min probs from each sampler,
 237      image_data: [],
 238      cache_prompt: true,
 239      api_key: ''
 240    })
 241
 242    /* START: Support for storing prompt templates and parameters in browsers LocalStorage */
 243
 244    const local_storage_storageKey = "llamacpp_server_local_storage";
 245
 246    function local_storage_setDataFromObject(tag, content) {
 247      localStorage.setItem(local_storage_storageKey + '/' + tag, JSON.stringify(content));
 248    }
 249
 250    function local_storage_setDataFromRawText(tag, content) {
 251      localStorage.setItem(local_storage_storageKey + '/' + tag, content);
 252    }
 253
 254    function local_storage_getDataAsObject(tag) {
 255      const item = localStorage.getItem(local_storage_storageKey + '/' + tag);
 256      if (!item) {
 257        return null;
 258      } else {
 259        return JSON.parse(item);
 260      }
 261    }
 262
 263    function local_storage_getDataAsRawText(tag) {
 264      const item = localStorage.getItem(local_storage_storageKey + '/' + tag);
 265      if (!item) {
 266        return null;
 267      } else {
 268        return item;
 269      }
 270    }
 271
 272    // create a container for user templates and settings
 273
 274    const savedUserTemplates = signal({})
 275    const selectedUserTemplate = signal({ name: '', template: { session: {}, params: {} } })
 276
 277    // let's import locally saved templates and settings if there are any
 278    // user templates and settings are stored in one object
 279    // in form of { "templatename": "templatedata" } and { "settingstemplatename":"settingsdata" }
 280
 281    console.log('Importing saved templates')
 282
 283    let importedTemplates = local_storage_getDataAsObject('user_templates')
 284
 285    if (importedTemplates) {
 286      // saved templates were successfully imported.
 287
 288      console.log('Processing saved templates and updating default template')
 289      params.value = { ...params.value, image_data: [] };
 290
 291      //console.log(importedTemplates);
 292      savedUserTemplates.value = importedTemplates;
 293
 294      //override default template
 295      savedUserTemplates.value.default = { session: session.value, params: params.value }
 296      local_storage_setDataFromObject('user_templates', savedUserTemplates.value)
 297    } else {
 298      // no saved templates detected.
 299
 300      console.log('Initializing LocalStorage and saving default template')
 301
 302      savedUserTemplates.value = { "default": { session: session.value, params: params.value } }
 303      local_storage_setDataFromObject('user_templates', savedUserTemplates.value)
 304    }
 305
 306    function userTemplateResetToDefault() {
 307      console.log('Resetting template to default')
 308      selectedUserTemplate.value.name = 'default';
 309      selectedUserTemplate.value.data = savedUserTemplates.value['default'];
 310    }
 311
 312    function userTemplateApply(t) {
 313      session.value = t.data.session;
 314      session.value = { ...session.value, image_selected: '' };
 315      params.value = t.data.params;
 316      params.value = { ...params.value, image_data: [] };
 317    }
 318
 319    function userTemplateResetToDefaultAndApply() {
 320      userTemplateResetToDefault()
 321      userTemplateApply(selectedUserTemplate.value)
 322    }
 323
 324    function userTemplateLoadAndApplyAutosaved() {
 325      // get autosaved last used template
 326      let lastUsedTemplate = local_storage_getDataAsObject('user_templates_last')
 327
 328      if (lastUsedTemplate) {
 329
 330        console.log('Autosaved template found, restoring')
 331
 332        selectedUserTemplate.value = lastUsedTemplate
 333      }
 334      else {
 335
 336        console.log('No autosaved template found, using default template')
 337        // no autosaved last used template was found, so load from default.
 338
 339        userTemplateResetToDefault()
 340      }
 341
 342      console.log('Applying template')
 343      // and update internal data from templates
 344
 345      userTemplateApply(selectedUserTemplate.value)
 346    }
 347
 348    //console.log(savedUserTemplates.value)
 349    //console.log(selectedUserTemplate.value)
 350
 351    function userTemplateAutosave() {
 352      console.log('Template Autosave...')
 353      if (selectedUserTemplate.value.name == 'default') {
 354        // we don't want to save over default template, so let's create a new one
 355        let newTemplateName = 'UserTemplate-' + Date.now().toString()
 356        let newTemplate = { 'name': newTemplateName, 'data': { 'session': session.value, 'params': params.value } }
 357
 358        console.log('Saving as ' + newTemplateName)
 359
 360        // save in the autosave slot
 361        local_storage_setDataFromObject('user_templates_last', newTemplate)
 362
 363        // and load it back and apply
 364        userTemplateLoadAndApplyAutosaved()
 365      } else {
 366        local_storage_setDataFromObject('user_templates_last', { 'name': selectedUserTemplate.value.name, 'data': { 'session': session.value, 'params': params.value } })
 367      }
 368    }
 369
 370    console.log('Checking for autosaved last used template')
 371    userTemplateLoadAndApplyAutosaved()
 372
 373    /* END: Support for storing prompt templates and parameters in browsers LocalStorage */
 374
 375    const llamaStats = signal(null)
 376    const controller = signal(null)
 377
 378    // currently generating a completion?
 379    const generating = computed(() => controller.value != null)
 380
 381    // has the user started a chat?
 382    const chatStarted = computed(() => session.value.transcript.length > 0)
 383
 384    const transcriptUpdate = (transcript) => {
 385      session.value = {
 386        ...session.value,
 387        transcript
 388      }
 389    }
 390
 391    // simple template replace
 392    const template = (str, extraSettings) => {
 393      let settings = session.value;
 394      if (extraSettings) {
 395        settings = { ...settings, ...extraSettings };
 396      }
 397      return String(str).replaceAll(/\{\{(.*?)\}\}/g, (_, key) => template(settings[key]));
 398    }
 399
 400    async function runLlama(prompt, llamaParams, char) {
 401      const currentMessages = [];
 402      const history = session.value.transcript;
 403      if (controller.value) {
 404        throw new Error("already running");
 405      }
 406      controller.value = new AbortController();
 407      for await (const chunk of llama(prompt, llamaParams, { controller: controller.value, api_url: location.pathname.replace(/\/+$/, '') })) {
 408        const data = chunk.data;
 409
 410        if (data.stop) {
 411          while (
 412            currentMessages.length > 0 &&
 413            currentMessages[currentMessages.length - 1].content.match(/\n$/) != null
 414          ) {
 415            currentMessages.pop();
 416          }
 417          transcriptUpdate([...history, [char, currentMessages]])
 418          console.log("Completion finished: '", currentMessages.map(msg => msg.content).join(''), "', summary: ", data);
 419        } else {
 420          currentMessages.push(data);
 421          slot_id = data.slot_id;
 422          if (selected_image && !data.multimodal) {
 423            alert("The server was not compiled for multimodal or the model projector can't be loaded.");
 424            return;
 425          }
 426          transcriptUpdate([...history, [char, currentMessages]])
 427        }
 428
 429        if (data.timings) {
 430          llamaStats.value = data;
 431        }
 432      }
 433
 434      controller.value = null;
 435    }
 436
 437    // send message to server
 438    const chat = async (msg) => {
 439      if (controller.value) {
 440        console.log('already running...');
 441        return;
 442      }
 443
 444      transcriptUpdate([...session.value.transcript, ["{{user}}", msg]])
 445
 446      let prompt = template(session.value.template, {
 447        message: msg,
 448        history: session.value.transcript.flatMap(
 449          ([name, data]) =>
 450            template(
 451              session.value.historyTemplate,
 452              {
 453                name,
 454                message: Array.isArray(data) ?
 455                  data.map(msg => msg.content).join('').replace(/^\s/, '') :
 456                  data,
 457              }
 458            )
 459        ).join("\n"),
 460      });
 461      if (selected_image) {
 462        prompt = `A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER:[img-10]${msg}\nASSISTANT:`;
 463      }
 464      await runLlama(prompt, {
 465        ...params.value,
 466        slot_id: slot_id,
 467        stop: ["</s>", template("{{char}}:"), template("{{user}}:")],
 468      }, "{{char}}");
 469    }
 470
 471    const runCompletion = () => {
 472      if (controller.value) {
 473        console.log('already running...');
 474        return;
 475      }
 476      const { prompt } = session.value;
 477      transcriptUpdate([...session.value.transcript, ["", prompt]]);
 478      runLlama(prompt, {
 479        ...params.value,
 480        slot_id: slot_id,
 481        stop: [],
 482      }, "").finally(() => {
 483        session.value.prompt = session.value.transcript.map(([_, data]) =>
 484          Array.isArray(data) ? data.map(msg => msg.content).join('') : data
 485        ).join('');
 486        session.value.transcript = [];
 487      })
 488    }
 489
 490    const stop = (e) => {
 491      e.preventDefault();
 492      if (controller.value) {
 493        controller.value.abort();
 494        controller.value = null;
 495      }
 496    }
 497
 498    const reset = (e) => {
 499      stop(e);
 500      transcriptUpdate([]);
 501    }
 502
 503    const uploadImage = (e) => {
 504      e.preventDefault();
 505      document.getElementById("fileInput").click();
 506      document.getElementById("fileInput").addEventListener("change", function (event) {
 507        const selectedFile = event.target.files[0];
 508        if (selectedFile) {
 509          const reader = new FileReader();
 510          reader.onload = function () {
 511            const image_data = reader.result;
 512            session.value = { ...session.value, image_selected: image_data };
 513            params.value = {
 514              ...params.value, image_data: [
 515                { data: image_data.replace(/data:image\/[^;]+;base64,/, ''), id: 10 }]
 516            }
 517          };
 518          selected_image = true;
 519          reader.readAsDataURL(selectedFile);
 520        }
 521      });
 522    }
 523
 524    function MessageInput() {
 525      const message = useSignal("")
 526
 527      const submit = (e) => {
 528        stop(e);
 529        chat(message.value);
 530        message.value = "";
 531      }
 532
 533      const enterSubmits = (event) => {
 534        if (event.which === 13 && !event.shiftKey) {
 535          submit(event);
 536        }
 537      }
 538
 539      return html`
 540        <form onsubmit=${submit}>
 541          <div>
 542            <textarea
 543               className=${generating.value ? "loading" : null}
 544               oninput=${(e) => message.value = e.target.value}
 545               onkeypress=${enterSubmits}
 546               placeholder="Say something..."
 547               rows=2
 548               type="text"
 549               value="${message}"
 550            />
 551          </div>
 552          <div class="right">
 553            <button type="submit" disabled=${generating.value}>Send</button>
 554            <button onclick=${uploadImage}>Upload Image</button>
 555            <button onclick=${stop} disabled=${!generating.value}>Stop</button>
 556            <button onclick=${reset}>Reset</button>
 557          </div>
 558        </form>
 559      `
 560    }
 561
 562    function CompletionControls() {
 563      const submit = (e) => {
 564        stop(e);
 565        runCompletion();
 566      }
 567      return html`
 568        <div>
 569          <button onclick=${submit} type="button" disabled=${generating.value}>Start</button>
 570          <button onclick=${stop} disabled=${!generating.value}>Stop</button>
 571          <button onclick=${reset}>Reset</button>
 572        </div>`;
 573    }
 574
 575    const ChatLog = (props) => {
 576      const messages = session.value.transcript;
 577      const container = useRef(null)
 578
 579      useEffect(() => {
 580        // scroll to bottom (if needed)
 581        const parent = container.current.parentElement;
 582        if (parent && parent.scrollHeight <= parent.scrollTop + parent.offsetHeight + 300) {
 583          parent.scrollTo(0, parent.scrollHeight)
 584        }
 585      }, [messages])
 586
 587      const isCompletionMode = session.value.type === 'completion'
 588      const chatLine = ([user, data], index) => {
 589        let message
 590        const isArrayMessage = Array.isArray(data)
 591        if (params.value.n_probs > 0 && isArrayMessage) {
 592          message = html`<${Probabilities} data=${data} />`
 593        } else {
 594          const text = isArrayMessage ?
 595            data.map(msg => msg.content).join('').replace(/^\s+/, '') :
 596            data;
 597          message = isCompletionMode ?
 598            text :
 599            html`<${Markdownish} text=${template(text)} />`
 600        }
 601        if (user) {
 602          return html`<p key=${index}><strong>${template(user)}:</strong> ${message}</p>`
 603        } else {
 604          return isCompletionMode ?
 605            html`<span key=${index}>${message}</span>` :
 606            html`<p key=${index}>${message}</p>`
 607        }
 608      };
 609
 610      const handleCompletionEdit = (e) => {
 611        session.value.prompt = e.target.innerText;
 612        session.value.transcript = [];
 613      }
 614
 615      return html`
 616        <div id="chat" ref=${container} key=${messages.length}>
 617          <img style="width: 60%;${!session.value.image_selected ? `display: none;` : ``}" src="${session.value.image_selected}"/>
 618          <span contenteditable=${isCompletionMode} ref=${container} oninput=${handleCompletionEdit}>
 619            ${messages.flatMap(chatLine)}
 620          </span>
 621        </div>`;
 622    };
 623
 624    const ConfigForm = (props) => {
 625      const updateSession = (el) => session.value = { ...session.value, [el.target.name]: el.target.value }
 626      const updateParams = (el) => params.value = { ...params.value, [el.target.name]: el.target.value }
 627      const updateParamsFloat = (el) => params.value = { ...params.value, [el.target.name]: parseFloat(el.target.value) }
 628      const updateParamsInt = (el) => params.value = { ...params.value, [el.target.name]: Math.floor(parseFloat(el.target.value)) }
 629      const updateParamsBool = (el) => params.value = { ...params.value, [el.target.name]: el.target.checked }
 630
 631      const grammarJsonSchemaPropOrder = signal('')
 632      const updateGrammarJsonSchemaPropOrder = (el) => grammarJsonSchemaPropOrder.value = el.target.value
 633      const convertJSONSchemaGrammar = async () => {
 634        try {
 635          let schema = JSON.parse(params.value.grammar)
 636          const converter = new SchemaConverter({
 637            prop_order: grammarJsonSchemaPropOrder.value
 638              .split(',')
 639              .reduce((acc, cur, i) => ({ ...acc, [cur.trim()]: i }), {}),
 640            allow_fetch: true,
 641          })
 642          schema = await converter.resolveRefs(schema, 'input')
 643          converter.visit(schema, '')
 644          params.value = {
 645            ...params.value,
 646            grammar: converter.formatGrammar(),
 647          }
 648        } catch (e) {
 649          alert(`Convert failed: ${e.message}`)
 650        }
 651      }
 652
 653      const FloatField = ({ label, max, min, name, step, value }) => {
 654        return html`
 655          <div>
 656            <label for="${name}">${label}</label>
 657            <input type="range" id="${name}" min="${min}" max="${max}" step="${step}" name="${name}" value="${value}" oninput=${updateParamsFloat} />
 658            <span>${value}</span>
 659          </div>
 660        `
 661      };
 662
 663      const IntField = ({ label, max, min, name, value }) => {
 664        return html`
 665          <div>
 666            <label for="${name}">${label}</label>
 667            <input type="range" id="${name}" min="${min}" max="${max}" name="${name}" value="${value}" oninput=${updateParamsInt} />
 668            <span>${value}</span>
 669          </div>
 670        `
 671      };
 672
 673      const BoolField = ({ label, name, value }) => {
 674        return html`
 675          <div>
 676            <label for="${name}">${label}</label>
 677            <input type="checkbox" id="${name}" name="${name}" checked="${value}" onclick=${updateParamsBool} />
 678          </div>
 679        `
 680      };
 681
 682      const userTemplateReset = (e) => {
 683        e.preventDefault();
 684        userTemplateResetToDefaultAndApply()
 685      }
 686
 687      const UserTemplateResetButton = () => {
 688        if (selectedUserTemplate.value.name == 'default') {
 689          return html`
 690            <button disabled>Using default template</button>
 691          `
 692        }
 693
 694        return html`
 695          <button onclick=${userTemplateReset}>Reset all to default</button>
 696        `
 697      };
 698
 699      useEffect(() => {
 700        // autosave template on every change
 701        userTemplateAutosave()
 702      }, [session.value, params.value])
 703
 704      const GrammarControl = () => (
 705        html`
 706          <div>
 707            <label for="template">Grammar</label>
 708            <textarea id="grammar" name="grammar" placeholder="Use gbnf or JSON Schema+convert" value="${params.value.grammar}" rows=4 oninput=${updateParams}/>
 709            <input type="text" name="prop-order" placeholder="order: prop1,prop2,prop3" oninput=${updateGrammarJsonSchemaPropOrder} />
 710            <button type="button" onclick=${convertJSONSchemaGrammar}>Convert JSON Schema</button>
 711          </div>
 712          `
 713      );
 714
 715      const PromptControlFieldSet = () => (
 716        html`
 717        <fieldset>
 718          <div>
 719            <label htmlFor="prompt">Prompt</label>
 720            <textarea type="text" name="prompt" value="${session.value.prompt}" oninput=${updateSession}/>
 721          </div>
 722        </fieldset>
 723        `
 724      );
 725
 726      const ChatConfigForm = () => (
 727        html`
 728          ${PromptControlFieldSet()}
 729
 730          <fieldset class="two">
 731            <div>
 732              <label for="user">User name</label>
 733              <input type="text" name="user" value="${session.value.user}" oninput=${updateSession} />
 734            </div>
 735
 736            <div>
 737              <label for="bot">Bot name</label>
 738              <input type="text" name="char" value="${session.value.char}" oninput=${updateSession} />
 739            </div>
 740          </fieldset>
 741
 742          <fieldset>
 743            <div>
 744              <label for="template">Prompt template</label>
 745              <textarea id="template" name="template" value="${session.value.template}" rows=4 oninput=${updateSession}/>
 746            </div>
 747
 748            <div>
 749              <label for="template">Chat history template</label>
 750              <textarea id="template" name="historyTemplate" value="${session.value.historyTemplate}" rows=1 oninput=${updateSession}/>
 751            </div>
 752            ${GrammarControl()}
 753          </fieldset>
 754      `
 755      );
 756
 757      const CompletionConfigForm = () => (
 758        html`
 759          ${PromptControlFieldSet()}
 760          <fieldset>${GrammarControl()}</fieldset>
 761        `
 762      );
 763
 764      return html`
 765        <form>
 766          <fieldset class="two">
 767            <${UserTemplateResetButton}/>
 768            <div>
 769              <label class="slim"><input type="radio" name="type" value="chat" checked=${session.value.type === "chat"} oninput=${updateSession} /> Chat</label>
 770              <label class="slim"><input type="radio" name="type" value="completion" checked=${session.value.type === "completion"} oninput=${updateSession} /> Completion</label>
 771            </div>
 772          </fieldset>
 773
 774          ${session.value.type === 'chat' ? ChatConfigForm() : CompletionConfigForm()}
 775
 776          <fieldset class="two">
 777            ${IntField({ label: "Predictions", max: 2048, min: -1, name: "n_predict", value: params.value.n_predict })}
 778            ${FloatField({ label: "Temperature", max: 2.0, min: 0.0, name: "temperature", step: 0.01, value: params.value.temperature })}
 779            ${FloatField({ label: "Penalize repeat sequence", max: 2.0, min: 0.0, name: "repeat_penalty", step: 0.01, value: params.value.repeat_penalty })}
 780            ${IntField({ label: "Consider N tokens for penalize", max: 2048, min: 0, name: "repeat_last_n", value: params.value.repeat_last_n })}
 781            ${IntField({ label: "Top-K sampling", max: 100, min: -1, name: "top_k", value: params.value.top_k })}
 782            ${FloatField({ label: "Top-P sampling", max: 1.0, min: 0.0, name: "top_p", step: 0.01, value: params.value.top_p })}
 783            ${FloatField({ label: "Min-P sampling", max: 1.0, min: 0.0, name: "min_p", step: 0.01, value: params.value.min_p })}
 784          </fieldset>
 785          <details>
 786            <summary>More options</summary>
 787            <fieldset class="two">
 788              ${FloatField({ label: "Typical P", max: 1.0, min: 0.0, name: "typical_p", step: 0.01, value: params.value.typical_p })}
 789              ${FloatField({ label: "Presence penalty", max: 1.0, min: 0.0, name: "presence_penalty", step: 0.01, value: params.value.presence_penalty })}
 790              ${FloatField({ label: "Frequency penalty", max: 1.0, min: 0.0, name: "frequency_penalty", step: 0.01, value: params.value.frequency_penalty })}
 791            </fieldset>
 792            <hr />
 793            <fieldset class="three">
 794              <div>
 795                <label><input type="radio" name="mirostat" value="0" checked=${params.value.mirostat == 0} oninput=${updateParamsInt} /> no Mirostat</label>
 796                <label><input type="radio" name="mirostat" value="1" checked=${params.value.mirostat == 1} oninput=${updateParamsInt} /> Mirostat v1</label>
 797                <label><input type="radio" name="mirostat" value="2" checked=${params.value.mirostat == 2} oninput=${updateParamsInt} /> Mirostat v2</label>
 798              </div>
 799              ${FloatField({ label: "Mirostat tau", max: 10.0, min: 0.0, name: "mirostat_tau", step: 0.01, value: params.value.mirostat_tau })}
 800              ${FloatField({ label: "Mirostat eta", max: 1.0, min: 0.0, name: "mirostat_eta", step: 0.01, value: params.value.mirostat_eta })}
 801            </fieldset>
 802            <fieldset>
 803              ${IntField({ label: "Show Probabilities", max: 10, min: 0, name: "n_probs", value: params.value.n_probs })}
 804            </fieldset>
 805            <fieldset>
 806              ${IntField({ label: "Min Probabilities from each Sampler", max: 10, min: 0, name: "min_keep", value: params.value.min_keep })}
 807            </fieldset>
 808            <fieldset>
 809              <label for="api_key">API Key</label>
 810              <input type="text" name="api_key" value="${params.value.api_key}" placeholder="Enter API key" oninput=${updateParams} />
 811            </fieldset>
 812          </details>
 813        </form>
 814      `
 815    }
 816
 817    const probColor = (p) => {
 818      const r = Math.floor(192 * (1 - p));
 819      const g = Math.floor(192 * p);
 820      return `rgba(${r},${g},0,0.3)`;
 821    }
 822
 823    const Probabilities = (params) => {
 824      return params.data.map(msg => {
 825        const { completion_probabilities } = msg;
 826        if (
 827          !completion_probabilities ||
 828          completion_probabilities.length === 0
 829        ) return msg.content
 830
 831        if (completion_probabilities.length > 1) {
 832          // Not for byte pair
 833          if (completion_probabilities[0].content.startsWith('byte: \\')) return msg.content
 834
 835          const splitData = completion_probabilities.map(prob => ({
 836            content: prob.content,
 837            completion_probabilities: [prob]
 838          }))
 839          return html`<${Probabilities} data=${splitData} />`
 840        }
 841
 842        const { probs, content } = completion_probabilities[0]
 843        const found = probs.find(p => p.tok_str === msg.content)
 844        const pColor = found ? probColor(found.prob) : 'transparent'
 845
 846        const popoverChildren = html`
 847          <div class="prob-set">
 848            ${probs.map((p, index) => {
 849          return html`
 850                <div
 851                  key=${index}
 852                  title=${`prob: ${p.prob}`}
 853                  style=${{
 854              padding: '0.3em',
 855              backgroundColor: p.tok_str === content ? probColor(p.prob) : 'transparent'
 856            }}
 857                >
 858                  <span>${p.tok_str}: </span>
 859                  <span>${Math.floor(p.prob * 100)}%</span>
 860                </div>
 861              `
 862        })}
 863          </div>
 864        `
 865
 866        return html`
 867          <${Popover} style=${{ backgroundColor: pColor }} popoverChildren=${popoverChildren}>
 868            ${msg.content.match(/\n/gim) ? html`<br />` : msg.content}
 869          </>
 870        `
 871      });
 872    }
 873
 874    // poor mans markdown replacement
 875    const Markdownish = (params) => {
 876      const md = params.text
 877        .replace(/&/g, '&amp;')
 878        .replace(/</g, '&lt;')
 879        .replace(/>/g, '&gt;')
 880        .replace(/^#{1,6} (.*)$/gim, '<h3>$1</h3>')
 881        .replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>')
 882        .replace(/__(.*?)__/g, '<strong>$1</strong>')
 883        .replace(/\*(.*?)\*/g, '<em>$1</em>')
 884        .replace(/_(.*?)_/g, '<em>$1</em>')
 885        .replace(/```.*?\n([\s\S]*?)```/g, '<pre><code>$1</code></pre>')
 886        .replace(/`(.*?)`/g, '<code>$1</code>')
 887        .replace(/\n/gim, '<br />');
 888      return html`<span dangerouslySetInnerHTML=${{ __html: md }} />`;
 889    };
 890
 891    const ModelGenerationInfo = (params) => {
 892      if (!llamaStats.value) {
 893        return html`<span/>`
 894      }
 895      return html`
 896        <span>
 897          ${llamaStats.value.tokens_predicted} predicted, ${llamaStats.value.tokens_cached} cached, ${llamaStats.value.timings.predicted_per_token_ms.toFixed()}ms per token, ${llamaStats.value.timings.predicted_per_second.toFixed(2)} tokens per second
 898        </span>
 899      `
 900    }
 901
 902    // simple popover impl
 903    const Popover = (props) => {
 904      const isOpen = useSignal(false);
 905      const position = useSignal({ top: '0px', left: '0px' });
 906      const buttonRef = useRef(null);
 907      const popoverRef = useRef(null);
 908
 909      const togglePopover = () => {
 910        if (buttonRef.current) {
 911          const rect = buttonRef.current.getBoundingClientRect();
 912          position.value = {
 913            top: `${rect.bottom + window.scrollY}px`,
 914            left: `${rect.left + window.scrollX}px`,
 915          };
 916        }
 917        isOpen.value = !isOpen.value;
 918      };
 919
 920      const handleClickOutside = (event) => {
 921        if (popoverRef.current && !popoverRef.current.contains(event.target) && !buttonRef.current.contains(event.target)) {
 922          isOpen.value = false;
 923        }
 924      };
 925
 926      useEffect(() => {
 927        document.addEventListener('mousedown', handleClickOutside);
 928        return () => {
 929          document.removeEventListener('mousedown', handleClickOutside);
 930        };
 931      }, []);
 932
 933      return html`
 934        <span style=${props.style} ref=${buttonRef} onClick=${togglePopover}>${props.children}</span>
 935        ${isOpen.value && html`
 936          <${Portal} into="#portal">
 937            <div
 938              ref=${popoverRef}
 939              class="popover-content"
 940              style=${{
 941            top: position.value.top,
 942            left: position.value.left,
 943          }}
 944            >
 945              ${props.popoverChildren}
 946            </div>
 947          </${Portal}>
 948        `}
 949      `;
 950    };
 951
 952    // Source: preact-portal (https://github.com/developit/preact-portal/blob/master/src/preact-portal.js)
 953    /** Redirect rendering of descendants into the given CSS selector */
 954    class Portal extends Component {
 955      componentDidUpdate(props) {
 956        for (let i in props) {
 957          if (props[i] !== this.props[i]) {
 958            return setTimeout(this.renderLayer);
 959          }
 960        }
 961      }
 962
 963      componentDidMount() {
 964        this.isMounted = true;
 965        this.renderLayer = this.renderLayer.bind(this);
 966        this.renderLayer();
 967      }
 968
 969      componentWillUnmount() {
 970        this.renderLayer(false);
 971        this.isMounted = false;
 972        if (this.remote && this.remote.parentNode) this.remote.parentNode.removeChild(this.remote);
 973      }
 974
 975      findNode(node) {
 976        return typeof node === 'string' ? document.querySelector(node) : node;
 977      }
 978
 979      renderLayer(show = true) {
 980        if (!this.isMounted) return;
 981
 982        // clean up old node if moving bases:
 983        if (this.props.into !== this.intoPointer) {
 984          this.intoPointer = this.props.into;
 985          if (this.into && this.remote) {
 986            this.remote = render(html`<${PortalProxy} />`, this.into, this.remote);
 987          }
 988          this.into = this.findNode(this.props.into);
 989        }
 990
 991        this.remote = render(html`
 992          <${PortalProxy} context=${this.context}>
 993            ${show && this.props.children || null}
 994          </${PortalProxy}>
 995        `, this.into, this.remote);
 996      }
 997
 998      render() {
 999        return null;
1000      }
1001    }
1002    // high-order component that renders its first child if it exists.
1003    // used as a conditional rendering proxy.
1004    class PortalProxy extends Component {
1005      getChildContext() {
1006        return this.props.context;
1007      }
1008      render({ children }) {
1009        return children || null;
1010      }
1011    }
1012
1013    function App(props) {
1014      useEffect(() => {
1015        const query = new URLSearchParams(location.search).get("q");
1016        if (query) chat(query);
1017      }, []);
1018
1019      return html`
1020        <div class="mode-${session.value.type}">
1021          <header>
1022            <h1>llama.cpp</h1>
1023          </header>
1024
1025          <section id="write">
1026            <${session.value.type === 'chat' ? MessageInput : CompletionControls} />
1027          </section>
1028
1029          <main id="content">
1030            <${chatStarted.value ? ChatLog : ConfigForm} />
1031          </main>
1032
1033          <footer>
1034            <p><${ModelGenerationInfo} /></p>
1035            <p>Powered by <a href="https://github.com/ggml-org/llama.cpp">llama.cpp</a> and <a href="https://ggml.ai">ggml.ai</a>.</p>
1036          </footer>
1037        </div>
1038      `;
1039    }
1040
1041    render(h(App), document.querySelector('#container'));
1042  </script>
1043</head>
1044
1045<body>
1046  <div id="container">
1047    <input type="file" id="fileInput" accept="image/*" style="display: none;">
1048  </div>
1049  <div id="portal"></div>
1050</body>
1051
1052</html>