archive llama.cpp-b8008.tar.gz
corpus lotr.txt map1_bromm.txt map1_dagna.txt map1_keldor.txt map1_skara.txt map1_thrain.txt
llama.cpp
.devops
nix apps.nix devshells.nix docker.nix jetson-support.nix nixpkgs-instances.nix package-gguf-py.nix package.nix python-scripts.nix scope.nix sif.nix
cann.Dockerfile cpu.Dockerfile cuda-new.Dockerfile cuda.Dockerfile intel.Dockerfile llama-cli-cann.Dockerfile llama-cpp-cuda.srpm.spec llama-cpp.srpm.spec musa.Dockerfile rocm.Dockerfile s390x.Dockerfile tools.sh vulkan.Dockerfile
.gemini settings.json
.github
ISSUE_TEMPLATE 010-bug-compilation.yml 011-bug-results.yml 019-bug-misc.yml 020-enhancement.yml 030-research.yml 040-refactor.yml config.yml
actions
get-tag-name action.yml
install-exe action.yml
linux-setup-spacemit action.yml
linux-setup-vulkan action.yml
unarchive-tar action.yml
windows-setup-cuda action.yml
windows-setup-rocm action.yml
workflows bench.yml.disabled build-cache.yml build-cmake-pkg.yml build-linux-cross.yml build.yml check-vendor.yml close-issue.yml copilot-setup-steps.yml docker.yml editorconfig.yml gguf-publish.yml labeler.yml pre-tokenizer-hashes.yml python-check-requirements.yml python-lint.yml python-type-check.yml release.yml server-metal.yml server-webui.yml server.yml update-ops-docs.yml winget.yml
labeler.yml pull_request_template.md
benches
dgx-spark aime25_openai__gpt-oss-120b-high_temp1.0_20251109_094547.html aime25_openai__gpt-oss-120b-high_temp1.0_20251109_094547.json aime25_openai__gpt-oss-120b-high_temp1.0_20251109_094547_allresults.json dgx-spark.md
mac-m2-ultra mac-m2-ultra.md
ci README-MUSA.md README.md run.sh
cmake arm64-apple-clang.cmake arm64-windows-llvm.cmake build-info.cmake common.cmake download-models.cmake git-vars.cmake license.cmake llama-config.cmake.in llama.pc.in riscv64-spacemit-linux-gnu-gcc.cmake x64-windows-llvm.cmake
common
jinja README.md caps.cpp caps.h lexer.cpp lexer.h parser.cpp parser.h runtime.cpp runtime.h string.cpp string.h utils.h value.cpp value.h
CMakeLists.txt arg.cpp arg.h base64.hpp build-info.cpp.in chat-parser-xml-toolcall.cpp chat-parser-xml-toolcall.h chat-parser.cpp chat-parser.h chat-peg-parser.cpp chat-peg-parser.h chat.cpp chat.h common.cpp common.h console.cpp console.h debug.cpp debug.h download.cpp download.h http.h json-partial.cpp json-partial.h json-schema-to-grammar.cpp json-schema-to-grammar.h llguidance.cpp log.cpp log.h ngram-cache.cpp ngram-cache.h ngram-map.cpp ngram-map.h ngram-mod.cpp ngram-mod.h peg-parser.cpp peg-parser.h preset.cpp preset.h regex-partial.cpp regex-partial.h sampling.cpp sampling.h speculative.cpp speculative.h unicode.cpp unicode.h
docs
android imported-into-android-studio.jpg
backend
VirtGPU configuration.md development.md
snapdragon CMakeUserPresets.json README.md developer.md windows.md
BLIS.md CANN.md CUDA-FEDORA.md OPENCL.md SYCL.md VirtGPU.md ZenDNN.md zDNN.md
development
llama-star idea-arch.key idea-arch.pdf
HOWTO-add-model.md debugging-tests.md parsing.md token_generation_performance_tips.md
multimodal MobileVLM.md gemma3.md glmedge.md granitevision.md llava.md minicpmo2.6.md minicpmo4.0.md minicpmv2.5.md minicpmv2.6.md minicpmv4.0.md minicpmv4.5.md
ops BLAS.csv CANN.csv CPU.csv CUDA.csv Metal.csv OpenCL.csv SYCL.csv Vulkan.csv WebGPU.csv ZenDNN.csv zDNN.csv
android.md build-riscv64-spacemit.md build-s390x.md build.md docker.md function-calling.md install.md llguidance.md multimodal.md ops.md preset.md speculative.md
examples
batched CMakeLists.txt README.md batched.cpp
batched.swift
Sources main.swift
.gitignore Makefile Package.swift README.md
convert-llama2c-to-ggml CMakeLists.txt README.md convert-llama2c-to-ggml.cpp
debug CMakeLists.txt README.md debug.cpp
deprecation-warning README.md deprecation-warning.cpp
diffusion CMakeLists.txt README.md diffusion-cli.cpp
embedding CMakeLists.txt README.md embedding.cpp
eval-callback CMakeLists.txt README.md eval-callback.cpp
gen-docs CMakeLists.txt gen-docs.cpp
gguf CMakeLists.txt gguf.cpp
gguf-hash
deps
rotate-bits package.json rotate-bits.h
sha1 package.json sha1.c sha1.h
sha256 package.json sha256.c sha256.h
xxhash clib.json xxhash.c xxhash.h
CMakeLists.txt README.md gguf-hash.cpp
idle CMakeLists.txt README.md idle.cpp
llama.android
app
src
main
java
com
example
llama MainActivity.kt MessageAdapter.kt
res
drawable bg_assistant_message.xml bg_user_message.xml ic_launcher_background.xml ic_launcher_foreground.xml outline_folder_open_24.xml outline_send_24.xml
layout activity_main.xml item_message_assistant.xml item_message_user.xml
mipmap-anydpi ic_launcher.xml ic_launcher_round.xml
mipmap-hdpi ic_launcher.webp ic_launcher_round.webp
mipmap-mdpi ic_launcher.webp ic_launcher_round.webp
mipmap-xhdpi ic_launcher.webp ic_launcher_round.webp
mipmap-xxhdpi ic_launcher.webp ic_launcher_round.webp
mipmap-xxxhdpi ic_launcher.webp ic_launcher_round.webp
values colors.xml strings.xml themes.xml
xml backup_rules.xml data_extraction_rules.xml
AndroidManifest.xml
.gitignore build.gradle.kts proguard-rules.pro
gradle
wrapper gradle-wrapper.jar gradle-wrapper.properties
libs.versions.toml
lib
src
androidTest
java
android
llama
cpp ExampleInstrumentedTest.kt
main
cpp CMakeLists.txt ai_chat.cpp logging.h
java
com
arm
aichat
gguf FileType.kt GgufMetadata.kt GgufMetadataReader.kt
internal
gguf GgufMetadataReaderImpl.kt
InferenceEngineImpl.kt
AiChat.kt InferenceEngine.kt
AndroidManifest.xml
test
java
android
llama
cpp ExampleUnitTest.kt
.gitignore build.gradle.kts consumer-rules.pro proguard-rules.pro
.gitignore build.gradle.kts gradle.properties gradlew settings.gradle.kts
llama.swiftui
llama.cpp.swift LibLlama.swift
llama.swiftui
Assets.xcassets
AppIcon.appiconset Contents.json
Contents.json
Models LlamaState.swift
Resources
models .gitignore
UI ContentView.swift DownloadButton.swift InputButton.swift LoadCustomButton.swift
llama_swiftuiApp.swift
llama.swiftui.xcodeproj
project.xcworkspace contents.xcworkspacedata
project.pbxproj
.gitignore README.md
lookahead CMakeLists.txt README.md lookahead.cpp
lookup CMakeLists.txt README.md lookup-create.cpp lookup-merge.cpp lookup-stats.cpp lookup.cpp
model-conversion
scripts
causal compare-embeddings-logits.sh compare-logits.py convert-model.sh modelcard.template run-casual-gen-embeddings-org.py run-converted-model-embeddings-logits.sh run-converted-model.sh run-org-model.py
embedding compare-embeddings-logits.sh convert-model.sh modelcard.template run-converted-model.sh run-original-model.py
utils __init__.py check-nmse.py common.py compare_tokens.py create-collection-add-model.sh curl-embedding-server.sh hf-add-model-to-collection.py hf-create-collection.py hf-create-model.py hf-upload-gguf-model.py inspect-converted-model.sh inspect-org-model.py perplexity-gen.sh perplexity-run-simple.sh perplexity-run.sh quantize.sh run-embedding-server.sh semantic_check.py tensor-info.py
.gitignore Makefile README.md requirements.txt
parallel CMakeLists.txt README.md parallel.cpp
passkey CMakeLists.txt README.md passkey.cpp
retrieval CMakeLists.txt README.md retrieval.cpp
save-load-state CMakeLists.txt save-load-state.cpp
simple CMakeLists.txt README.md simple.cpp
simple-chat CMakeLists.txt README.md simple-chat.cpp
simple-cmake-pkg .gitignore CMakeLists.txt README.md
speculative CMakeLists.txt README.md speculative.cpp
speculative-simple CMakeLists.txt README.md speculative-simple.cpp
sycl CMakeLists.txt README.md build.sh ls-sycl-device.cpp run-llama2.sh test.sh win-build-sycl.bat win-run-llama2.bat win-test.bat
training CMakeLists.txt README.md finetune.cpp
CMakeLists.txt convert_legacy_llama.py json_schema_pydantic_example.py json_schema_to_grammar.py llama.vim pydantic_models_to_grammar.py pydantic_models_to_grammar_examples.py reason-act.sh regex_to_grammar.py server-llama2-13B.sh server_embd.py ts-type-to-grammar.sh
ggml
cmake GitVars.cmake common.cmake ggml-config.cmake.in
include ggml-alloc.h ggml-backend.h ggml-blas.h ggml-cann.h ggml-cpp.h ggml-cpu.h ggml-cuda.h ggml-hexagon.h ggml-metal.h ggml-opencl.h ggml-opt.h ggml-rpc.h ggml-sycl.h ggml-virtgpu.h ggml-vulkan.h ggml-webgpu.h ggml-zdnn.h ggml-zendnn.h ggml.h gguf.h
src
ggml-blas CMakeLists.txt ggml-blas.cpp
ggml-cann CMakeLists.txt acl_tensor.cpp acl_tensor.h aclnn_ops.cpp aclnn_ops.h common.h ggml-cann.cpp
ggml-cpu
amx amx.cpp amx.h common.h mmq.cpp mmq.h
arch
arm cpu-feats.cpp quants.c repack.cpp
loongarch quants.c
powerpc cpu-feats.cpp quants.c
riscv cpu-feats.cpp quants.c repack.cpp
s390 cpu-feats.cpp quants.c
wasm quants.c
x86 cpu-feats.cpp quants.c repack.cpp
cmake FindSIMD.cmake
kleidiai kernels.cpp kernels.h kleidiai.cpp kleidiai.h
llamafile sgemm-ppc.h sgemm.cpp sgemm.h
spacemit ime.cpp ime.h ime1_kernels.cpp ime_kernels.h
CMakeLists.txt arch-fallback.h binary-ops.cpp binary-ops.h common.h ggml-cpu-impl.h ggml-cpu.c ggml-cpu.cpp hbm.cpp hbm.h ops.cpp ops.h quants.c quants.h repack.cpp repack.h simd-mappings.h traits.cpp traits.h unary-ops.cpp unary-ops.h vec.cpp vec.h
ggml-cuda
template-instances fattn-mma-f16-instance-ncols1_1-ncols2_16.cu fattn-mma-f16-instance-ncols1_1-ncols2_32.cu fattn-mma-f16-instance-ncols1_1-ncols2_8.cu fattn-mma-f16-instance-ncols1_16-ncols2_1.cu fattn-mma-f16-instance-ncols1_16-ncols2_2.cu fattn-mma-f16-instance-ncols1_16-ncols2_4.cu fattn-mma-f16-instance-ncols1_2-ncols2_16.cu fattn-mma-f16-instance-ncols1_2-ncols2_32.cu fattn-mma-f16-instance-ncols1_2-ncols2_4.cu fattn-mma-f16-instance-ncols1_2-ncols2_8.cu fattn-mma-f16-instance-ncols1_32-ncols2_1.cu fattn-mma-f16-instance-ncols1_32-ncols2_2.cu fattn-mma-f16-instance-ncols1_4-ncols2_16.cu fattn-mma-f16-instance-ncols1_4-ncols2_2.cu fattn-mma-f16-instance-ncols1_4-ncols2_4.cu fattn-mma-f16-instance-ncols1_4-ncols2_8.cu fattn-mma-f16-instance-ncols1_64-ncols2_1.cu fattn-mma-f16-instance-ncols1_8-ncols2_1.cu fattn-mma-f16-instance-ncols1_8-ncols2_2.cu fattn-mma-f16-instance-ncols1_8-ncols2_4.cu fattn-mma-f16-instance-ncols1_8-ncols2_8.cu fattn-tile-instance-dkq112-dv112.cu fattn-tile-instance-dkq128-dv128.cu fattn-tile-instance-dkq256-dv256.cu fattn-tile-instance-dkq40-dv40.cu fattn-tile-instance-dkq576-dv512.cu fattn-tile-instance-dkq64-dv64.cu fattn-tile-instance-dkq72-dv72.cu fattn-tile-instance-dkq80-dv80.cu fattn-tile-instance-dkq96-dv96.cu fattn-vec-instance-f16-f16.cu fattn-vec-instance-f16-q4_0.cu fattn-vec-instance-f16-q4_1.cu fattn-vec-instance-f16-q5_0.cu fattn-vec-instance-f16-q5_1.cu fattn-vec-instance-f16-q8_0.cu fattn-vec-instance-q4_0-f16.cu fattn-vec-instance-q4_0-q4_0.cu fattn-vec-instance-q4_0-q4_1.cu fattn-vec-instance-q4_0-q5_0.cu fattn-vec-instance-q4_0-q5_1.cu fattn-vec-instance-q4_0-q8_0.cu fattn-vec-instance-q4_1-f16.cu fattn-vec-instance-q4_1-q4_0.cu fattn-vec-instance-q4_1-q4_1.cu fattn-vec-instance-q4_1-q5_0.cu fattn-vec-instance-q4_1-q5_1.cu fattn-vec-instance-q4_1-q8_0.cu fattn-vec-instance-q5_0-f16.cu fattn-vec-instance-q5_0-q4_0.cu fattn-vec-instance-q5_0-q4_1.cu fattn-vec-instance-q5_0-q5_0.cu fattn-vec-instance-q5_0-q5_1.cu fattn-vec-instance-q5_0-q8_0.cu fattn-vec-instance-q5_1-f16.cu fattn-vec-instance-q5_1-q4_0.cu fattn-vec-instance-q5_1-q4_1.cu fattn-vec-instance-q5_1-q5_0.cu fattn-vec-instance-q5_1-q5_1.cu fattn-vec-instance-q5_1-q8_0.cu fattn-vec-instance-q8_0-f16.cu fattn-vec-instance-q8_0-q4_0.cu fattn-vec-instance-q8_0-q4_1.cu fattn-vec-instance-q8_0-q5_0.cu fattn-vec-instance-q8_0-q5_1.cu fattn-vec-instance-q8_0-q8_0.cu generate_cu_files.py mmf-instance-ncols_1.cu mmf-instance-ncols_10.cu mmf-instance-ncols_11.cu mmf-instance-ncols_12.cu mmf-instance-ncols_13.cu mmf-instance-ncols_14.cu mmf-instance-ncols_15.cu mmf-instance-ncols_16.cu mmf-instance-ncols_2.cu mmf-instance-ncols_3.cu mmf-instance-ncols_4.cu mmf-instance-ncols_5.cu mmf-instance-ncols_6.cu mmf-instance-ncols_7.cu mmf-instance-ncols_8.cu mmf-instance-ncols_9.cu mmq-instance-iq1_s.cu mmq-instance-iq2_s.cu mmq-instance-iq2_xs.cu mmq-instance-iq2_xxs.cu mmq-instance-iq3_s.cu mmq-instance-iq3_xxs.cu mmq-instance-iq4_nl.cu mmq-instance-iq4_xs.cu mmq-instance-mxfp4.cu mmq-instance-q2_k.cu mmq-instance-q3_k.cu mmq-instance-q4_0.cu mmq-instance-q4_1.cu mmq-instance-q4_k.cu mmq-instance-q5_0.cu mmq-instance-q5_1.cu mmq-instance-q5_k.cu mmq-instance-q6_k.cu mmq-instance-q8_0.cu
vendors cuda.h hip.h musa.h
CMakeLists.txt acc.cu acc.cuh add-id.cu add-id.cuh arange.cu arange.cuh argmax.cu argmax.cuh argsort.cu argsort.cuh binbcast.cu binbcast.cuh clamp.cu clamp.cuh common.cuh concat.cu concat.cuh conv-transpose-1d.cu conv-transpose-1d.cuh conv2d-dw.cu conv2d-dw.cuh conv2d-transpose.cu conv2d-transpose.cuh conv2d.cu conv2d.cuh convert.cu convert.cuh count-equal.cu count-equal.cuh cp-async.cuh cpy-utils.cuh cpy.cu cpy.cuh cross-entropy-loss.cu cross-entropy-loss.cuh cumsum.cu cumsum.cuh dequantize.cuh diag.cu diag.cuh diagmask.cu diagmask.cuh fattn-common.cuh fattn-mma-f16.cuh fattn-tile.cu fattn-tile.cuh fattn-vec.cuh fattn-wmma-f16.cu fattn-wmma-f16.cuh fattn.cu fattn.cuh fill.cu fill.cuh getrows.cu getrows.cuh ggml-cuda.cu gla.cu gla.cuh im2col.cu im2col.cuh mean.cu mean.cuh mma.cuh mmf.cu mmf.cuh mmid.cu mmid.cuh mmq.cu mmq.cuh mmvf.cu mmvf.cuh mmvq.cu mmvq.cuh norm.cu norm.cuh opt-step-adamw.cu opt-step-adamw.cuh opt-step-sgd.cu opt-step-sgd.cuh out-prod.cu out-prod.cuh pad.cu pad.cuh pad_reflect_1d.cu pad_reflect_1d.cuh pool2d.cu pool2d.cuh quantize.cu quantize.cuh reduce_rows.cuh roll.cu roll.cuh rope.cu rope.cuh scale.cu scale.cuh set-rows.cu set-rows.cuh set.cu set.cuh softcap.cu softcap.cuh softmax.cu softmax.cuh solve_tri.cu solve_tri.cuh ssm-conv.cu ssm-conv.cuh ssm-scan.cu ssm-scan.cuh sum.cu sum.cuh sumrows.cu sumrows.cuh top-k.cu top-k.cuh topk-moe.cu topk-moe.cuh tri.cu tri.cuh tsembd.cu tsembd.cuh unary.cu unary.cuh upscale.cu upscale.cuh vecdotq.cuh wkv.cu wkv.cuh
ggml-hexagon
htp CMakeLists.txt act-ops.c argsort-ops.c binary-ops.c cmake-toolchain.cmake cpy-ops.c flash-attn-ops.c get-rows-ops.c hex-dma.c hex-dma.h hex-dump.h hex-fastdiv.h hex-utils.h htp-ctx.h htp-msg.h htp-ops.h htp_iface.idl hvx-arith.h hvx-base.h hvx-copy.h hvx-div.h hvx-dump.h hvx-exp.h hvx-floor.h hvx-inverse.h hvx-reduce.h hvx-scale.h hvx-sigmoid.h hvx-sqrt.h hvx-types.h hvx-utils.h main.c matmul-ops.c rope-ops.c set-rows-ops.c softmax-ops.c sum-rows-ops.c unary-ops.c worker-pool.c worker-pool.h
CMakeLists.txt ggml-hexagon.cpp htp-drv.cpp htp-drv.h libdl.h libggml-htp.inf op-desc.h
ggml-hip CMakeLists.txt
ggml-metal CMakeLists.txt ggml-metal-common.cpp ggml-metal-common.h ggml-metal-context.h ggml-metal-context.m ggml-metal-device.cpp ggml-metal-device.h ggml-metal-device.m ggml-metal-impl.h ggml-metal-ops.cpp ggml-metal-ops.h ggml-metal.cpp ggml-metal.metal
ggml-musa CMakeLists.txt mudnn.cu mudnn.cuh
ggml-opencl
kernels add.cl add_id.cl argsort.cl clamp.cl concat.cl conv2d.cl conv2d_f16_f32.cl cpy.cl cvt.cl diag_mask_inf.cl div.cl embed_kernel.py expm1.cl fill.cl flash_attn_f16.cl flash_attn_f32.cl flash_attn_f32_f16.cl gelu.cl gemm_moe_mxfp4_f32.cl gemv_moe_mxfp4_f32.cl gemv_noshuffle.cl gemv_noshuffle_general.cl gemv_noshuffle_general_q8_0_f32.cl get_rows.cl glu.cl group_norm.cl im2col_f16.cl im2col_f32.cl mean.cl mul.cl mul_mat_Ab_Bi_8x4.cl mul_mat_f16_f32.cl mul_mm_f16_f32_kq_kqv.cl mul_mm_f16_f32_l4_lm.cl mul_mm_f32_f32_l4_lm.cl mul_mm_q6_k_f32_l4_lm.cl mul_mm_q8_0_f32_8x4.cl mul_mm_q8_0_f32_l4_lm.cl mul_mv_f16_f16.cl mul_mv_f16_f32.cl mul_mv_f16_f32_1row.cl mul_mv_f16_f32_l4.cl mul_mv_f32_f32.cl mul_mv_id_mxfp4_f32.cl mul_mv_id_mxfp4_f32_flat.cl mul_mv_id_q4_0_f32_8x_flat.cl mul_mv_id_q8_0_f32.cl mul_mv_id_q8_0_f32_flat.cl mul_mv_mxfp4_f32.cl mul_mv_mxfp4_f32_flat.cl mul_mv_q4_0_f32.cl mul_mv_q4_0_f32_1d_16x_flat.cl mul_mv_q4_0_f32_1d_8x_flat.cl mul_mv_q4_0_f32_8x_flat.cl mul_mv_q4_0_f32_v.cl mul_mv_q4_k_f32.cl mul_mv_q6_k_f32.cl mul_mv_q6_k_f32_flat.cl mul_mv_q8_0_f32.cl mul_mv_q8_0_f32_flat.cl norm.cl pad.cl relu.cl repeat.cl rms_norm.cl rope.cl scale.cl set_rows.cl sigmoid.cl silu.cl softmax_4_f16.cl softmax_4_f32.cl softmax_f16.cl softmax_f32.cl softplus.cl solve_tri.cl sqr.cl sqrt.cl ssm_conv.cl sub.cl sum_rows.cl tanh.cl transpose.cl tri.cl tsembd.cl upscale.cl
CMakeLists.txt ggml-opencl.cpp
ggml-rpc CMakeLists.txt ggml-rpc.cpp
ggml-sycl
dpct helper.hpp
CMakeLists.txt add-id.cpp add-id.hpp backend.hpp binbcast.cpp binbcast.hpp common.cpp common.hpp concat.cpp concat.hpp conv.cpp conv.hpp convert.cpp convert.hpp count-equal.cpp count-equal.hpp cpy.cpp cpy.hpp dequantize.hpp dmmv.cpp dmmv.hpp element_wise.cpp element_wise.hpp gemm.hpp getrows.cpp getrows.hpp ggml-sycl.cpp gla.cpp gla.hpp im2col.cpp im2col.hpp mmq.cpp mmq.hpp mmvq.cpp mmvq.hpp norm.cpp norm.hpp outprod.cpp outprod.hpp pad.cpp pad.hpp pad_reflect_1d.cpp pad_reflect_1d.hpp presets.hpp quantize.hpp quants.hpp repeat_back.cpp repeat_back.hpp roll.cpp roll.hpp rope.cpp rope.hpp set.cpp set.hpp set_rows.cpp set_rows.hpp softmax.cpp softmax.hpp ssm_conv.cpp ssm_conv.hpp sycl_hw.cpp sycl_hw.hpp tsembd.cpp tsembd.hpp vecdotq.hpp wkv.cpp wkv.hpp
ggml-virtgpu
backend
shared api_remoting.h apir_backend.gen.h apir_backend.h apir_cs.h apir_cs_ggml.h apir_cs_rpc.h
CMakeLists.txt apir_cs_ggml-rpc-back.cpp backend-convert.h backend-dispatched-backend.cpp backend-dispatched-buffer-type.cpp backend-dispatched-buffer.cpp backend-dispatched-device.cpp backend-dispatched.cpp backend-dispatched.gen.h backend-dispatched.h backend-virgl-apir.h backend.cpp
include apir_hw.h
CMakeLists.txt apir_cs_ggml-rpc-front.cpp ggml-backend-buffer-type.cpp ggml-backend-buffer.cpp ggml-backend-device.cpp ggml-backend-reg.cpp ggml-backend.cpp ggml-remoting.h ggmlremoting_functions.yaml regenerate_remoting.py virtgpu-apir.h virtgpu-forward-backend.cpp virtgpu-forward-buffer-type.cpp virtgpu-forward-buffer.cpp virtgpu-forward-device.cpp virtgpu-forward-impl.h virtgpu-forward.gen.h virtgpu-shm.cpp virtgpu-shm.h virtgpu-utils.cpp virtgpu-utils.h virtgpu.cpp virtgpu.h
ggml-vulkan
cmake host-toolchain.cmake.in
vulkan-shaders
feature-tests bfloat16.comp coopmat.comp coopmat2.comp integer_dot.comp
CMakeLists.txt abs.comp acc.comp add.comp add1.comp add_id.comp arange.comp argmax.comp argsort.comp argsort_large.comp ceil.comp clamp.comp concat.comp contig_copy.comp conv2d_dw.comp conv2d_mm.comp conv_transpose_1d.comp copy.comp copy_from_quant.comp copy_to_quant.comp copy_transpose.comp cos.comp count_equal.comp count_experts.comp cumsum.comp cumsum_multipass1.comp cumsum_multipass2.comp dequant_f32.comp dequant_funcs.glsl dequant_funcs_cm2.glsl dequant_head.glsl dequant_iq1_m.comp dequant_iq1_s.comp dequant_iq2_s.comp dequant_iq2_xs.comp dequant_iq2_xxs.comp dequant_iq3_s.comp dequant_iq3_xxs.comp dequant_iq4_nl.comp dequant_iq4_xs.comp dequant_mxfp4.comp dequant_q2_k.comp dequant_q3_k.comp dequant_q4_0.comp dequant_q4_1.comp dequant_q4_k.comp dequant_q5_0.comp dequant_q5_1.comp dequant_q5_k.comp dequant_q6_k.comp dequant_q8_0.comp diag.comp diag_mask_inf.comp div.comp exp.comp fill.comp flash_attn.comp flash_attn_base.glsl flash_attn_cm1.comp flash_attn_cm2.comp flash_attn_mask_opt.comp flash_attn_split_k_reduce.comp floor.comp geglu.comp geglu_erf.comp geglu_quick.comp gelu.comp gelu_erf.comp gelu_quick.comp generic_binary_head.glsl generic_head.glsl generic_unary_head.glsl get_rows.comp get_rows_quant.comp glu_head.glsl glu_main.glsl group_norm.comp hardsigmoid.comp hardswish.comp im2col.comp im2col_3d.comp l2_norm.comp leaky_relu.comp log.comp mul.comp mul_mat_split_k_reduce.comp mul_mat_vec.comp mul_mat_vec_base.glsl mul_mat_vec_iface.glsl mul_mat_vec_iq1_m.comp mul_mat_vec_iq1_s.comp mul_mat_vec_iq2_s.comp mul_mat_vec_iq2_xs.comp mul_mat_vec_iq2_xxs.comp mul_mat_vec_iq3_s.comp mul_mat_vec_iq3_xxs.comp mul_mat_vec_nc.comp mul_mat_vec_p021.comp mul_mat_vec_q2_k.comp mul_mat_vec_q3_k.comp mul_mat_vec_q4_k.comp mul_mat_vec_q5_k.comp mul_mat_vec_q6_k.comp mul_mat_vecq.comp mul_mat_vecq_funcs.glsl mul_mm.comp mul_mm_cm2.comp mul_mm_funcs.glsl mul_mm_id_funcs.glsl mul_mmq.comp mul_mmq_funcs.glsl mul_mmq_shmem_types.glsl multi_add.comp neg.comp norm.comp opt_step_adamw.comp opt_step_sgd.comp pad.comp pool2d.comp quantize_q8_1.comp reglu.comp relu.comp repeat.comp repeat_back.comp rms_norm.comp rms_norm_back.comp rms_norm_partials.comp roll.comp rope_funcs.glsl rope_head.glsl rope_multi.comp rope_neox.comp rope_norm.comp rope_params.glsl rope_vision.comp round.comp rte.glsl scale.comp sigmoid.comp silu.comp silu_back.comp sin.comp soft_max.comp soft_max_back.comp soft_max_large1.comp soft_max_large2.comp soft_max_large3.comp soft_max_large_common.glsl softplus.comp solve_tri.comp sqrt.comp square.comp ssm_conv.comp ssm_scan.comp step.comp sub.comp sum_rows.comp sum_rows.glsl swiglu.comp swiglu_oai.comp tanh.comp timestep_embedding.comp topk_argsort.comp topk_moe.comp topk_nary_search.comp tri.comp trunc.comp types.glsl upscale.comp utils.glsl vulkan-shaders-gen.cpp wkv6.comp wkv7.comp xielu.comp
CMakeLists.txt ggml-vulkan.cpp
ggml-webgpu
wgsl-shaders argmax.wgsl argsort.wgsl argsort_merge.wgsl binary.wgsl common_decls.tmpl cpy.tmpl.wgsl cumsum.wgsl embed_wgsl.py flash_attn.wgsl get_rows.tmpl.wgsl glu.tmpl.wgsl memset.wgsl mul_mat.tmpl.wgsl mul_mat_decls.tmpl mul_mat_reg_tile.tmpl.wgsl mul_mat_subgroup_matrix.tmpl.wgsl mul_mat_vec.tmpl.wgsl pad.wgsl rms_norm.wgsl rope.tmpl.wgsl scale.tmpl.wgsl set_rows.wgsl soft_max.tmpl.wgsl sum_rows.wgsl unary.wgsl
CMakeLists.txt ggml-webgpu-shader-lib.hpp ggml-webgpu.cpp pre_wgsl.hpp
ggml-zdnn .gitignore CMakeLists.txt common.hpp ggml-zdnn.cpp mmf.cpp mmf.hpp utils.cpp utils.hpp
ggml-zendnn CMakeLists.txt ggml-zendnn.cpp
CMakeLists.txt ggml-alloc.c ggml-backend-dl.cpp ggml-backend-dl.h ggml-backend-impl.h ggml-backend-reg.cpp ggml-backend.cpp ggml-common.h ggml-impl.h ggml-opt.cpp ggml-quants.c ggml-quants.h ggml-threading.cpp ggml-threading.h ggml.c ggml.cpp gguf.cpp
.gitignore CMakeLists.txt
gguf-py
examples reader.py writer.py
gguf
scripts gguf_convert_endian.py gguf_dump.py gguf_editor_gui.py gguf_hash.py gguf_new_metadata.py gguf_set_metadata.py
__init__.py constants.py gguf.py gguf_reader.py gguf_writer.py lazy.py metadata.py py.typed quants.py tensor_mapping.py utility.py vocab.py
tests __init__.py test_metadata.py test_quants.py
LICENSE README.md pyproject.toml
grammars README.md arithmetic.gbnf c.gbnf chess.gbnf english.gbnf japanese.gbnf json.gbnf json_arr.gbnf list.gbnf
include llama-cpp.h llama.h
licenses LICENSE-jsonhpp
media llama0-banner.png llama0-logo.png llama1-banner.png llama1-icon-transparent.png llama1-icon-transparent.svg llama1-icon.png llama1-icon.svg llama1-logo.png llama1-logo.svg matmul.png matmul.svg
models
templates Apertus-8B-Instruct.jinja ByteDance-Seed-OSS.jinja CohereForAI-c4ai-command-r-plus-tool_use.jinja CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja GLM-4.6.jinja Kimi-K2-Instruct.jinja Kimi-K2-Thinking.jinja MiMo-VL.jinja MiniMax-M2.jinja Mistral-Small-3.2-24B-Instruct-2506.jinja NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja NVIDIA-Nemotron-Nano-v2.jinja NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja Qwen-QwQ-32B.jinja Qwen-Qwen2.5-7B-Instruct.jinja Qwen-Qwen3-0.6B.jinja Qwen3-Coder.jinja README.md deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja deepseek-ai-DeepSeek-V3.1.jinja fireworks-ai-llama-3-firefunction-v2.jinja google-gemma-2-2b-it.jinja ibm-granite-granite-3.3-2B-Instruct.jinja llama-cpp-deepseek-r1.jinja llama-cpp-lfm2.jinja llama-cpp-rwkv-world.jinja meetkai-functionary-medium-v3.1.jinja meetkai-functionary-medium-v3.2.jinja meta-llama-Llama-3.1-8B-Instruct.jinja meta-llama-Llama-3.2-3B-Instruct.jinja meta-llama-Llama-3.3-70B-Instruct.jinja microsoft-Phi-3.5-mini-instruct.jinja mistralai-Ministral-3-14B-Reasoning-2512.jinja mistralai-Mistral-Nemo-Instruct-2407.jinja moonshotai-Kimi-K2.jinja openai-gpt-oss-120b.jinja unsloth-Apriel-1.5.jinja unsloth-mistral-Devstral-Small-2507.jinja upstage-Solar-Open-100B.jinja
.editorconfig ggml-vocab-aquila.gguf ggml-vocab-baichuan.gguf ggml-vocab-bert-bge.gguf ggml-vocab-bert-bge.gguf.inp ggml-vocab-bert-bge.gguf.out ggml-vocab-command-r.gguf ggml-vocab-command-r.gguf.inp ggml-vocab-command-r.gguf.out ggml-vocab-deepseek-coder.gguf ggml-vocab-deepseek-coder.gguf.inp ggml-vocab-deepseek-coder.gguf.out ggml-vocab-deepseek-llm.gguf ggml-vocab-deepseek-llm.gguf.inp ggml-vocab-deepseek-llm.gguf.out ggml-vocab-falcon.gguf ggml-vocab-falcon.gguf.inp ggml-vocab-falcon.gguf.out ggml-vocab-gpt-2.gguf ggml-vocab-gpt-2.gguf.inp ggml-vocab-gpt-2.gguf.out ggml-vocab-gpt-neox.gguf ggml-vocab-llama-bpe.gguf ggml-vocab-llama-bpe.gguf.inp ggml-vocab-llama-bpe.gguf.out ggml-vocab-llama-spm.gguf ggml-vocab-llama-spm.gguf.inp ggml-vocab-llama-spm.gguf.out ggml-vocab-mpt.gguf ggml-vocab-mpt.gguf.inp ggml-vocab-mpt.gguf.out ggml-vocab-nomic-bert-moe.gguf ggml-vocab-phi-3.gguf ggml-vocab-phi-3.gguf.inp ggml-vocab-phi-3.gguf.out ggml-vocab-qwen2.gguf ggml-vocab-qwen2.gguf.inp ggml-vocab-qwen2.gguf.out ggml-vocab-refact.gguf ggml-vocab-refact.gguf.inp ggml-vocab-refact.gguf.out ggml-vocab-starcoder.gguf ggml-vocab-starcoder.gguf.inp ggml-vocab-starcoder.gguf.out
pocs
vdot CMakeLists.txt q8dot.cpp vdot.cpp
CMakeLists.txt
requirements requirements-all.txt requirements-compare-llama-bench.txt requirements-convert_hf_to_gguf.txt requirements-convert_hf_to_gguf_update.txt requirements-convert_legacy_llama.txt requirements-convert_llama_ggml_to_gguf.txt requirements-convert_lora_to_gguf.txt requirements-gguf_editor_gui.txt requirements-pydantic.txt requirements-server-bench.txt requirements-test-tokenizer-random.txt requirements-tool_bench.txt
scripts
apple validate-apps.sh validate-ios.sh validate-macos.sh validate-tvos.sh validate-visionos.sh
jinja jinja-tester.py requirements.txt
snapdragon
adb llama-cli.farf run-bench.sh run-cli.sh run-completion.sh run-mtmd.sh run-tool.sh
qdc
tests test_bench.py
readme.md requirements.txt
windows run-bench.ps1 run-cli.ps1 run-tool.ps1 setup-build.ps1
bench-models.sh build-info.sh check-requirements.sh compare-commits.sh compare-llama-bench.py compare-logprobs.py create_ops_docs.py debug-test.sh fetch_server_test_models.py gen-authors.sh gen-unicode-data.py get-flags.mk get-hellaswag.sh get-pg.sh get-wikitext-103.sh get-wikitext-2.sh get-winogrande.sh get_chat_template.py hf.sh install-oneapi.bat pr2wt.sh serve-static.js server-bench.py sync-ggml-am.sh sync-ggml.last sync-ggml.sh sync_vendor.py tool_bench.py tool_bench.sh verify-checksum-models.py xxd.cmake
src
models afmoe.cpp apertus.cpp arcee.cpp arctic.cpp arwkv7.cpp baichuan.cpp bailingmoe.cpp bailingmoe2.cpp bert.cpp bitnet.cpp bloom.cpp chameleon.cpp chatglm.cpp codeshell.cpp cogvlm.cpp cohere2-iswa.cpp command-r.cpp dbrx.cpp deci.cpp deepseek.cpp deepseek2.cpp dots1.cpp dream.cpp ernie4-5-moe.cpp ernie4-5.cpp exaone-moe.cpp exaone.cpp exaone4.cpp falcon-h1.cpp falcon.cpp gemma-embedding.cpp gemma.cpp gemma2-iswa.cpp gemma3.cpp gemma3n-iswa.cpp glm4-moe.cpp glm4.cpp gpt2.cpp gptneox.cpp granite-hybrid.cpp granite.cpp graph-context-mamba.cpp grok.cpp grovemoe.cpp hunyuan-dense.cpp hunyuan-moe.cpp internlm2.cpp jais.cpp jamba.cpp kimi-linear.cpp lfm2.cpp llada-moe.cpp llada.cpp llama-iswa.cpp llama.cpp maincoder.cpp mamba.cpp mimo2-iswa.cpp minicpm3.cpp minimax-m2.cpp mistral3.cpp models.h modern-bert.cpp mpt.cpp nemotron-h.cpp nemotron.cpp neo-bert.cpp olmo.cpp olmo2.cpp olmoe.cpp openai-moe-iswa.cpp openelm.cpp orion.cpp pangu-embedded.cpp phi2.cpp phi3.cpp plamo.cpp plamo2.cpp plamo3.cpp plm.cpp qwen.cpp qwen2.cpp qwen2moe.cpp qwen2vl.cpp qwen3.cpp qwen35.cpp qwen35moe.cpp qwen3moe.cpp qwen3next.cpp qwen3vl-moe.cpp qwen3vl.cpp refact.cpp rnd1.cpp rwkv6-base.cpp rwkv6.cpp rwkv6qwen2.cpp rwkv7-base.cpp rwkv7.cpp seed-oss.cpp smallthinker.cpp smollm3.cpp stablelm.cpp starcoder.cpp starcoder2.cpp step35-iswa.cpp t5-dec.cpp t5-enc.cpp wavtokenizer-dec.cpp xverse.cpp
CMakeLists.txt llama-adapter.cpp llama-adapter.h llama-arch.cpp llama-arch.h llama-batch.cpp llama-batch.h llama-chat.cpp llama-chat.h llama-context.cpp llama-context.h llama-cparams.cpp llama-cparams.h llama-grammar.cpp llama-grammar.h llama-graph.cpp llama-graph.h llama-hparams.cpp llama-hparams.h llama-impl.cpp llama-impl.h llama-io.cpp llama-io.h llama-kv-cache-iswa.cpp llama-kv-cache-iswa.h llama-kv-cache.cpp llama-kv-cache.h llama-kv-cells.h llama-memory-hybrid-iswa.cpp llama-memory-hybrid-iswa.h llama-memory-hybrid.cpp llama-memory-hybrid.h llama-memory-recurrent.cpp llama-memory-recurrent.h llama-memory.cpp llama-memory.h llama-mmap.cpp llama-mmap.h llama-model-loader.cpp llama-model-loader.h llama-model-saver.cpp llama-model-saver.h llama-model.cpp llama-model.h llama-quant.cpp llama-quant.h llama-sampler.cpp llama-sampler.h llama-vocab.cpp llama-vocab.h llama.cpp unicode-data.cpp unicode-data.h unicode.cpp unicode.h
tests
peg-parser simple-tokenize.cpp simple-tokenize.h test-basic.cpp test-gbnf-generation.cpp test-json-parser.cpp test-json-serialization.cpp test-unicode.cpp tests.h
.gitignore CMakeLists.txt get-model.cpp get-model.h run-json-schema-to-grammar.mjs test-alloc.cpp test-arg-parser.cpp test-autorelease.cpp test-backend-ops.cpp test-backend-sampler.cpp test-barrier.cpp test-c.c test-chat-parser.cpp test-chat-peg-parser.cpp test-chat-template.cpp test-chat.cpp test-double-float.cpp test-gbnf-validator.cpp test-gguf.cpp test-grammar-integration.cpp test-grammar-llguidance.cpp test-grammar-parser.cpp test-jinja.cpp test-json-partial.cpp test-json-schema-to-grammar.cpp test-llama-grammar.cpp test-log.cpp test-lora-conversion-inference.sh test-model-load-cancel.cpp test-mtmd-c-api.c test-opt.cpp test-peg-parser.cpp test-quantize-fns.cpp test-quantize-perf.cpp test-quantize-stats.cpp test-regex-partial.cpp test-rope.cpp test-sampling.cpp test-state-restore-fragmented.cpp test-thread-safety.cpp test-tokenizer-0.cpp test-tokenizer-0.py test-tokenizer-0.sh test-tokenizer-1-bpe.cpp test-tokenizer-1-spm.cpp test-tokenizer-random.py test-tokenizers-repo.sh testing.h
tools
batched-bench CMakeLists.txt README.md batched-bench.cpp
cli CMakeLists.txt README.md cli.cpp
completion CMakeLists.txt README.md completion.cpp
cvector-generator CMakeLists.txt README.md completions.txt cvector-generator.cpp mean.hpp negative.txt pca.hpp positive.txt
export-lora CMakeLists.txt README.md export-lora.cpp
fit-params CMakeLists.txt README.md fit-params.cpp
gguf-split CMakeLists.txt README.md gguf-split.cpp tests.sh
imatrix CMakeLists.txt README.md imatrix.cpp
llama-bench CMakeLists.txt README.md llama-bench.cpp
mtmd
legacy-models convert_image_encoder_to_gguf.py glmedge-convert-image-encoder-to-gguf.py glmedge-surgery.py llava_surgery.py llava_surgery_v2.py minicpmv-convert-image-encoder-to-gguf.py minicpmv-surgery.py
models cogvlm.cpp conformer.cpp glm4v.cpp internvl.cpp kimik25.cpp kimivl.cpp llama4.cpp llava.cpp minicpmv.cpp mobilenetv5.cpp models.h pixtral.cpp qwen2vl.cpp qwen3vl.cpp siglip.cpp whisper-enc.cpp youtuvl.cpp
CMakeLists.txt README.md clip-graph.h clip-impl.h clip-model.h clip.cpp clip.h deprecation-warning.cpp mtmd-audio.cpp mtmd-audio.h mtmd-cli.cpp mtmd-helper.cpp mtmd-helper.h mtmd.cpp mtmd.h requirements.txt test-1.jpeg test-2.mp3 tests.sh
perplexity CMakeLists.txt README.md perplexity.cpp
quantize CMakeLists.txt README.md quantize.cpp tests.sh
rpc CMakeLists.txt README.md rpc-server.cpp
server
bench README.md bench.py prometheus.yml requirements.txt script.js
public index.html.gz loading.html
public_legacy colorthemes.css completion.js favicon.ico index-new.html index.html index.js json-schema-to-grammar.mjs loading.html prompt-formats.js style.css system-prompts.js theme-beeninorder.css theme-ketivah.css theme-mangotango.css theme-playground.css theme-polarnight.css theme-snowstorm.css
public_simplechat datautils.mjs index.html readme.md simplechat.css simplechat.js simplechat_screens.webp ui.mjs
tests
unit test_basic.py test_chat_completion.py test_compat_anthropic.py test_compat_oai_responses.py test_completion.py test_ctx_shift.py test_embedding.py test_infill.py test_lora.py test_rerank.py test_router.py test_security.py test_sleep.py test_slot_save.py test_speculative.py test_template.py test_tokenize.py test_tool_call.py test_vision_api.py
.gitignore README.md conftest.py pytest.ini requirements.txt tests.sh utils.py
themes
buttons-top README.md buttons_top.png favicon.ico index.html
wild README.md favicon.ico index.html llama_cpp.png llamapattern.png wild.png
README.md
webui
.storybook ModeWatcherDecorator.svelte TooltipProviderDecorator.svelte main.ts preview.ts vitest.setup.ts
docs
architecture high-level-architecture-simplified.md high-level-architecture.md
flows chat-flow.md conversations-flow.md data-flow-simplified-model-mode.md data-flow-simplified-router-mode.md database-flow.md models-flow.md server-flow.md settings-flow.md
scripts dev.sh install-git-hooks.sh post-build.sh
src
lib
components
app
chat
ChatAttachments ChatAttachmentPreview.svelte ChatAttachmentThumbnailFile.svelte ChatAttachmentThumbnailImage.svelte ChatAttachmentsList.svelte ChatAttachmentsViewAll.svelte
ChatForm
ChatFormActions ChatFormActionFileAttachments.svelte ChatFormActionRecord.svelte ChatFormActionSubmit.svelte ChatFormActions.svelte
ChatForm.svelte ChatFormFileInputInvisible.svelte ChatFormHelperText.svelte ChatFormTextarea.svelte
ChatMessages ChatMessage.svelte ChatMessageActions.svelte ChatMessageAssistant.svelte ChatMessageBranchingControls.svelte ChatMessageEditForm.svelte ChatMessageStatistics.svelte ChatMessageSystem.svelte ChatMessageThinkingBlock.svelte ChatMessageUser.svelte ChatMessages.svelte
ChatScreen ChatScreen.svelte ChatScreenDragOverlay.svelte ChatScreenHeader.svelte ChatScreenProcessingInfo.svelte
ChatSettings ChatSettings.svelte ChatSettingsFields.svelte ChatSettingsFooter.svelte ChatSettingsImportExportTab.svelte ChatSettingsParameterSourceIndicator.svelte
ChatSidebar ChatSidebar.svelte ChatSidebarActions.svelte ChatSidebarConversationItem.svelte ChatSidebarSearch.svelte handle-mobile-sidebar-item-click.ts
dialogs DialogChatAttachmentPreview.svelte DialogChatAttachmentsViewAll.svelte DialogChatError.svelte DialogChatSettings.svelte DialogConfirmation.svelte DialogConversationSelection.svelte DialogConversationTitleUpdate.svelte DialogEmptyFileAlert.svelte DialogModelInformation.svelte DialogModelNotAvailable.svelte
misc ActionButton.svelte ActionDropdown.svelte BadgeChatStatistic.svelte BadgeInfo.svelte BadgeModality.svelte CodePreviewDialog.svelte ConversationSelection.svelte CopyToClipboardIcon.svelte KeyboardShortcutInfo.svelte MarkdownContent.svelte RemoveButton.svelte SearchInput.svelte SyntaxHighlightedCode.svelte
models ModelBadge.svelte ModelsSelector.svelte
server ServerErrorSplash.svelte ServerLoadingSplash.svelte ServerStatus.svelte
index.ts
ui
alert alert-description.svelte alert-title.svelte alert.svelte index.ts
alert-dialog alert-dialog-action.svelte alert-dialog-cancel.svelte alert-dialog-content.svelte alert-dialog-description.svelte alert-dialog-footer.svelte alert-dialog-header.svelte alert-dialog-overlay.svelte alert-dialog-title.svelte alert-dialog-trigger.svelte index.ts
badge badge.svelte index.ts
button button.svelte index.ts
card card-action.svelte card-content.svelte card-description.svelte card-footer.svelte card-header.svelte card-title.svelte card.svelte index.ts
checkbox checkbox.svelte index.ts
collapsible collapsible-content.svelte collapsible-trigger.svelte collapsible.svelte index.ts
dialog dialog-close.svelte dialog-content.svelte dialog-description.svelte dialog-footer.svelte dialog-header.svelte dialog-overlay.svelte dialog-title.svelte dialog-trigger.svelte index.ts
dropdown-menu dropdown-menu-checkbox-item.svelte dropdown-menu-content.svelte dropdown-menu-group-heading.svelte dropdown-menu-group.svelte dropdown-menu-item.svelte dropdown-menu-label.svelte dropdown-menu-radio-group.svelte dropdown-menu-radio-item.svelte dropdown-menu-separator.svelte dropdown-menu-shortcut.svelte dropdown-menu-sub-content.svelte dropdown-menu-sub-trigger.svelte dropdown-menu-trigger.svelte index.ts
input index.ts input.svelte
label index.ts label.svelte
popover index.ts popover-close.svelte popover-content.svelte popover-portal.svelte popover-trigger.svelte popover.svelte
scroll-area index.ts scroll-area-scrollbar.svelte scroll-area.svelte
select index.ts select-content.svelte select-group-heading.svelte select-group.svelte select-item.svelte select-label.svelte select-scroll-down-button.svelte select-scroll-up-button.svelte select-separator.svelte select-trigger.svelte
separator index.ts separator.svelte
sheet index.ts sheet-close.svelte sheet-content.svelte sheet-description.svelte sheet-footer.svelte sheet-header.svelte sheet-overlay.svelte sheet-title.svelte sheet-trigger.svelte
sidebar constants.ts context.svelte.ts index.ts sidebar-content.svelte sidebar-footer.svelte sidebar-group-action.svelte sidebar-group-content.svelte sidebar-group-label.svelte sidebar-group.svelte sidebar-header.svelte sidebar-input.svelte sidebar-inset.svelte sidebar-menu-action.svelte sidebar-menu-badge.svelte sidebar-menu-button.svelte sidebar-menu-item.svelte sidebar-menu-skeleton.svelte sidebar-menu-sub-button.svelte sidebar-menu-sub-item.svelte sidebar-menu-sub.svelte sidebar-menu.svelte sidebar-provider.svelte sidebar-rail.svelte sidebar-separator.svelte sidebar-trigger.svelte sidebar.svelte
skeleton index.ts skeleton.svelte
switch index.ts switch.svelte
table index.ts table-body.svelte table-caption.svelte table-cell.svelte table-footer.svelte table-head.svelte table-header.svelte table-row.svelte table.svelte
textarea index.ts textarea.svelte
tooltip index.ts tooltip-content.svelte tooltip-trigger.svelte
utils.ts
constants auto-scroll.ts binary-detection.ts default-context.ts floating-ui-constraints.ts icons.ts input-classes.ts latex-protection.ts literal-html.ts localstorage-keys.ts max-bundle-size.ts precision.ts processing-info.ts settings-config.ts supported-file-types.ts table-html-restorer.ts tooltip-config.ts viewport.ts
enums attachment.ts chat.ts files.ts index.ts model.ts server.ts
hooks is-mobile.svelte.ts use-model-change-validation.svelte.ts use-processing-state.svelte.ts
markdown enhance-code-blocks.ts enhance-links.ts literal-html.ts table-html-restorer.ts
services chat.ts database.ts index.ts models.ts parameter-sync.spec.ts parameter-sync.ts props.ts
stores chat.svelte.ts conversations.svelte.ts models.svelte.ts persisted.svelte.ts server.svelte.ts settings.svelte.ts
types api.d.ts chat.d.ts database.d.ts index.ts models.d.ts settings.d.ts
utils api-headers.ts api-key-validation.ts attachment-display.ts attachment-type.ts audio-recording.ts autoresize-textarea.ts branching.ts browser-only.ts clipboard.ts config-helpers.ts conversation-utils.ts convert-files-to-extra.ts file-preview.ts file-type.ts formatters.ts index.ts is-ime-composing.ts latex-protection.ts modality-file-validation.ts model-names.ts pdf-processing.ts portal-to-body.ts precision.ts process-uploaded-files.ts svg-to-png.ts syntax-highlight-language.ts text-files.ts text.ts webp-to-png.ts
routes
chat
[id] +page.svelte +page.ts
+error.svelte +layout.svelte +page.svelte +page.ts
styles katex-custom.scss
app.css app.d.ts app.html
static favicon.svg loading.html
tests
client
components TestWrapper.svelte
page.svelte.test.ts
e2e demo.test.ts
stories
fixtures
assets 1.jpg beautiful-flowers-lotus.webp example.pdf hf-logo.svg
ai-tutorial.ts api-docs.ts blog-post.ts data-analysis.ts empty.ts math-formulas.ts readme.ts storybook-mocks.ts
ChatForm.stories.svelte ChatMessage.stories.svelte ChatSettings.stories.svelte ChatSidebar.stories.svelte Introduction.mdx MarkdownContent.stories.svelte
unit clipboard.test.ts latex-protection.test.ts model-names.test.ts
.gitignore .npmrc .prettierignore .prettierrc README.md components.json eslint.config.js package-lock.json package.json playwright.config.ts svelte.config.js tsconfig.json vite.config.ts vitest-setup-client.ts
CMakeLists.txt README-dev.md README.md chat-llama2.sh chat.mjs chat.sh server-common.cpp server-common.h server-context.cpp server-context.h server-http.cpp server-http.h server-models.cpp server-models.h server-queue.cpp server-queue.h server-task.cpp server-task.h server.cpp
tokenize CMakeLists.txt tokenize.cpp
tts CMakeLists.txt README.md convert_pt_to_hf.py tts-outetts.py tts.cpp
CMakeLists.txt
vendor
cpp-httplib CMakeLists.txt LICENSE httplib.cpp httplib.h
miniaudio miniaudio.h
nlohmann json.hpp json_fwd.hpp
sheredom subprocess.h
stb stb_image.h
.clang-format .clang-tidy .dockerignore .ecrc .editorconfig .flake8 .gitignore .gitmodules .pre-commit-config.yaml AGENTS.md AUTHORS CLAUDE.md CMakeLists.txt CMakePresets.json CODEOWNERS CONTRIBUTING.md LICENSE Makefile README.md SECURITY.md convert_hf_to_gguf.py convert_hf_to_gguf_update.py convert_llama_ggml_to_gguf.py convert_lora_to_gguf.py flake.lock flake.nix mypy.ini poetry.lock pyproject.toml pyrightconfig.json requirements.txt
maps map1.h map1.txt
papers 2310.11703v2.pdf 2405.14159v2.pdf
prompts lotr.h lotr.txt
.gitignore Dockerfile Makefile README.md compile_flags.txt context.c game.c makext.mk mapeditor.html maps.h minunit.h models.h models.txt nonstd.h npc.c termbox2.h vectordb.c vectordb.h
llama.cpp/tools/server/themes/wild/index.html raw
   1<html>
   2
   3<head>
   4  <meta charset="UTF-8">
   5  <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1" />
   6  <meta name="color-scheme" content="light dark">
   7  <title>llama.cpp - chat</title>
   8
   9  <style>
  10    body {
  11      font-family: system-ui;
  12      font-size: 90%;
  13      background-image: url('llamapattern.png');
  14    }
  15
  16    #container {
  17      margin: 0em auto;
  18      display: flex;
  19      flex-direction: column;
  20      justify-content: space-between;
  21      height: 100%;
  22    }
  23
  24    main {
  25      margin: 3px;
  26      display: flex;
  27      flex-direction: column;
  28      justify-content: space-between;
  29      gap: 1em;
  30
  31      flex-grow: 1;
  32      overflow-y: auto;
  33
  34      border: 1px solid #ccc;
  35      border-radius: 5px;
  36      padding: 0.5em;
  37
  38      background-color: rgba(255,255,255,0.9);
  39    }
  40
  41    body {
  42      max-width: 600px;
  43      min-width: 300px;
  44      line-height: 1.2;
  45      margin: 0 auto;
  46      padding: 0 0.5em;
  47    }
  48
  49    p {
  50      overflow-wrap: break-word;
  51      word-wrap: break-word;
  52      hyphens: auto;
  53      margin-top: 0.5em;
  54      margin-bottom: 0.5em;
  55    }
  56
  57    #write form {
  58      margin: 1em 0 0 0;
  59      display: flex;
  60      flex-direction: column;
  61      gap: 0.5em;
  62      align-items: stretch;
  63    }
  64
  65    .right {
  66      display: flex;
  67      flex-direction: row;
  68      gap: 0.5em;
  69      justify-content: flex-end;
  70    }
  71
  72    fieldset {
  73      border: none;
  74      padding: 0;
  75      margin: 0;
  76    }
  77
  78    fieldset.two {
  79      display: grid;
  80      grid-template: "a a";
  81      gap: 1em;
  82    }
  83
  84    fieldset.three {
  85      display: grid;
  86      grid-template: "a a a";
  87      gap: 1em;
  88    }
  89
  90    details {
  91      border: 1px solid #aaa;
  92      border-radius: 4px;
  93      padding: 0.5em 0.5em 0;
  94      margin-top: 0.5em;
  95    }
  96
  97    summary {
  98      font-weight: bold;
  99      margin: -0.5em -0.5em 0;
 100      padding: 0.5em;
 101      cursor: pointer;
 102    }
 103
 104    details[open] {
 105      padding: 0.5em;
 106    }
 107
 108    .prob-set {
 109      padding: 0.3em;
 110      border-bottom: 1px solid #ccc;
 111    }
 112
 113    .popover-content {
 114      position: absolute;
 115      background-color: white;
 116      padding: 0.2em;
 117      box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
 118    }
 119
 120    textarea {
 121      padding: 5px;
 122      flex-grow: 1;
 123      width: 100%;
 124    }
 125
 126    pre code {
 127      display: block;
 128      background-color: #222;
 129      color: #ddd;
 130    }
 131
 132    code {
 133      font-family: monospace;
 134      padding: 0.1em 0.3em;
 135      border-radius: 3px;
 136    }
 137
 138    fieldset label {
 139      margin: 0.5em 0;
 140      display: block;
 141    }
 142
 143    fieldset label.slim {
 144      margin: 0 0.5em;
 145      display: inline;
 146    }
 147
 148    header,
 149    footer {
 150      text-align: center;
 151    }
 152
 153    footer {
 154      font-size: 80%;
 155      color: #888;
 156    }
 157
 158    .mode-chat textarea[name=prompt] {
 159      height: 4.5em;
 160    }
 161
 162    .mode-completion textarea[name=prompt] {
 163      height: 10em;
 164    }
 165
 166    [contenteditable] {
 167      display: inline-block;
 168      white-space: pre-wrap;
 169      outline: 0px solid transparent;
 170    }
 171
 172    @keyframes loading-bg-wipe {
 173      0% {
 174        background-position: 0%;
 175      }
 176
 177      100% {
 178        background-position: 100%;
 179      }
 180    }
 181
 182    .loading {
 183      --loading-color-1: #eeeeee00;
 184      --loading-color-2: #eeeeeeff;
 185      background-size: 50% 100%;
 186      background-image: linear-gradient(90deg, var(--loading-color-1), var(--loading-color-2), var(--loading-color-1));
 187      animation: loading-bg-wipe 2s linear infinite;
 188    }
 189
 190    @media (prefers-color-scheme: dark) {
 191      .loading {
 192        --loading-color-1: #22222200;
 193        --loading-color-2: #222222ff;
 194      }
 195
 196      .popover-content {
 197        background-color: black;
 198      }
 199    }
 200  </style>
 201
 202  <script type="module">
 203    import {
 204      html, h, signal, effect, computed, render, useSignal, useEffect, useRef, Component
 205    } from './index.js';
 206
 207    import { llama } from './completion.js';
 208    import { SchemaConverter } from './json-schema-to-grammar.mjs';
 209    let selected_image = false;
 210    var slot_id = -1;
 211
 212    const session = signal({
 213      prompt: "This is a conversation between User and Llama, a friendly chatbot. Llama is helpful, kind, honest, good at writing, and never fails to answer any requests immediately and with precision.",
 214      template: "{{prompt}}\n\n{{history}}\n{{char}}:",
 215      historyTemplate: "{{name}}: {{message}}",
 216      transcript: [],
 217      type: "chat",  // "chat" | "completion"
 218      char: "Llama",
 219      user: "User",
 220      image_selected: ''
 221    })
 222
 223    const params = signal({
 224      n_predict: 400,
 225      temperature: 0.7,
 226      repeat_last_n: 256, // 0 = disable penalty, -1 = context size
 227      repeat_penalty: 1.18, // 1.0 = disabled
 228      top_k: 40, // <= 0 to use vocab size
 229      top_p: 0.95, // 1.0 = disabled
 230      min_p: 0.05, // 0 = disabled
 231      typical_p: 1.0, // 1.0 = disabled
 232      presence_penalty: 0.0, // 0.0 = disabled
 233      frequency_penalty: 0.0, // 0.0 = disabled
 234      mirostat: 0, // 0/1/2
 235      mirostat_tau: 5, // target entropy
 236      mirostat_eta: 0.1, // learning rate
 237      grammar: '',
 238      n_probs: 0, // no completion_probabilities,
 239      min_keep: 0, // min probs from each sampler,
 240      image_data: [],
 241      cache_prompt: true,
 242      api_key: ''
 243    })
 244
 245    /* START: Support for storing prompt templates and parameters in browsers LocalStorage */
 246
 247    const local_storage_storageKey = "llamacpp_server_local_storage";
 248
 249    function local_storage_setDataFromObject(tag, content) {
 250      localStorage.setItem(local_storage_storageKey + '/' + tag, JSON.stringify(content));
 251    }
 252
 253    function local_storage_setDataFromRawText(tag, content) {
 254      localStorage.setItem(local_storage_storageKey + '/' + tag, content);
 255    }
 256
 257    function local_storage_getDataAsObject(tag) {
 258      const item = localStorage.getItem(local_storage_storageKey + '/' + tag);
 259      if (!item) {
 260        return null;
 261      } else {
 262        return JSON.parse(item);
 263      }
 264    }
 265
 266    function local_storage_getDataAsRawText(tag) {
 267      const item = localStorage.getItem(local_storage_storageKey + '/' + tag);
 268      if (!item) {
 269        return null;
 270      } else {
 271        return item;
 272      }
 273    }
 274
 275    // create a container for user templates and settings
 276
 277    const savedUserTemplates = signal({})
 278    const selectedUserTemplate = signal({ name: '', template: { session: {}, params: {} } })
 279
 280    // let's import locally saved templates and settings if there are any
 281    // user templates and settings are stored in one object
 282    // in form of { "templatename": "templatedata" } and { "settingstemplatename":"settingsdata" }
 283
 284    console.log('Importing saved templates')
 285
 286    let importedTemplates = local_storage_getDataAsObject('user_templates')
 287
 288    if (importedTemplates) {
 289      // saved templates were successfully imported.
 290
 291      console.log('Processing saved templates and updating default template')
 292      params.value = { ...params.value, image_data: [] };
 293
 294      //console.log(importedTemplates);
 295      savedUserTemplates.value = importedTemplates;
 296
 297      //override default template
 298      savedUserTemplates.value.default = { session: session.value, params: params.value }
 299      local_storage_setDataFromObject('user_templates', savedUserTemplates.value)
 300    } else {
 301      // no saved templates detected.
 302
 303      console.log('Initializing LocalStorage and saving default template')
 304
 305      savedUserTemplates.value = { "default": { session: session.value, params: params.value } }
 306      local_storage_setDataFromObject('user_templates', savedUserTemplates.value)
 307    }
 308
 309    function userTemplateResetToDefault() {
 310      console.log('Resetting template to default')
 311      selectedUserTemplate.value.name = 'default';
 312      selectedUserTemplate.value.data = savedUserTemplates.value['default'];
 313    }
 314
 315    function userTemplateApply(t) {
 316      session.value = t.data.session;
 317      session.value = { ...session.value, image_selected: '' };
 318      params.value = t.data.params;
 319      params.value = { ...params.value, image_data: [] };
 320    }
 321
 322    function userTemplateResetToDefaultAndApply() {
 323      userTemplateResetToDefault()
 324      userTemplateApply(selectedUserTemplate.value)
 325    }
 326
 327    function userTemplateLoadAndApplyAutosaved() {
 328      // get autosaved last used template
 329      let lastUsedTemplate = local_storage_getDataAsObject('user_templates_last')
 330
 331      if (lastUsedTemplate) {
 332
 333        console.log('Autosaved template found, restoring')
 334
 335        selectedUserTemplate.value = lastUsedTemplate
 336      }
 337      else {
 338
 339        console.log('No autosaved template found, using default template')
 340        // no autosaved last used template was found, so load from default.
 341
 342        userTemplateResetToDefault()
 343      }
 344
 345      console.log('Applying template')
 346      // and update internal data from templates
 347
 348      userTemplateApply(selectedUserTemplate.value)
 349    }
 350
 351    //console.log(savedUserTemplates.value)
 352    //console.log(selectedUserTemplate.value)
 353
 354    function userTemplateAutosave() {
 355      console.log('Template Autosave...')
 356      if (selectedUserTemplate.value.name == 'default') {
 357        // we don't want to save over default template, so let's create a new one
 358        let newTemplateName = 'UserTemplate-' + Date.now().toString()
 359        let newTemplate = { 'name': newTemplateName, 'data': { 'session': session.value, 'params': params.value } }
 360
 361        console.log('Saving as ' + newTemplateName)
 362
 363        // save in the autosave slot
 364        local_storage_setDataFromObject('user_templates_last', newTemplate)
 365
 366        // and load it back and apply
 367        userTemplateLoadAndApplyAutosaved()
 368      } else {
 369        local_storage_setDataFromObject('user_templates_last', { 'name': selectedUserTemplate.value.name, 'data': { 'session': session.value, 'params': params.value } })
 370      }
 371    }
 372
 373    console.log('Checking for autosaved last used template')
 374    userTemplateLoadAndApplyAutosaved()
 375
 376    /* END: Support for storing prompt templates and parameters in browsers LocalStorage */
 377
 378    const llamaStats = signal(null)
 379    const controller = signal(null)
 380
 381    // currently generating a completion?
 382    const generating = computed(() => controller.value != null)
 383
 384    // has the user started a chat?
 385    const chatStarted = computed(() => session.value.transcript.length > 0)
 386
 387    const transcriptUpdate = (transcript) => {
 388      session.value = {
 389        ...session.value,
 390        transcript
 391      }
 392    }
 393
 394    // simple template replace
 395    const template = (str, extraSettings) => {
 396      let settings = session.value;
 397      if (extraSettings) {
 398        settings = { ...settings, ...extraSettings };
 399      }
 400      return String(str).replaceAll(/\{\{(.*?)\}\}/g, (_, key) => template(settings[key]));
 401    }
 402
 403    async function runLlama(prompt, llamaParams, char) {
 404      const currentMessages = [];
 405      const history = session.value.transcript;
 406      if (controller.value) {
 407        throw new Error("already running");
 408      }
 409      controller.value = new AbortController();
 410      for await (const chunk of llama(prompt, llamaParams, { controller: controller.value, api_url: location.pathname.replace(/\/+$/, '') })) {
 411        const data = chunk.data;
 412
 413        if (data.stop) {
 414          while (
 415            currentMessages.length > 0 &&
 416            currentMessages[currentMessages.length - 1].content.match(/\n$/) != null
 417          ) {
 418            currentMessages.pop();
 419          }
 420          transcriptUpdate([...history, [char, currentMessages]])
 421          console.log("Completion finished: '", currentMessages.map(msg => msg.content).join(''), "', summary: ", data);
 422        } else {
 423          currentMessages.push(data);
 424          slot_id = data.slot_id;
 425          if (selected_image && !data.multimodal) {
 426            alert("The server was not compiled for multimodal or the model projector can't be loaded.");
 427            return;
 428          }
 429          transcriptUpdate([...history, [char, currentMessages]])
 430        }
 431
 432        if (data.timings) {
 433          llamaStats.value = data;
 434        }
 435      }
 436
 437      controller.value = null;
 438    }
 439
 440    // send message to server
 441    const chat = async (msg) => {
 442      if (controller.value) {
 443        console.log('already running...');
 444        return;
 445      }
 446
 447      transcriptUpdate([...session.value.transcript, ["{{user}}", msg]])
 448
 449      let prompt = template(session.value.template, {
 450        message: msg,
 451        history: session.value.transcript.flatMap(
 452          ([name, data]) =>
 453            template(
 454              session.value.historyTemplate,
 455              {
 456                name,
 457                message: Array.isArray(data) ?
 458                  data.map(msg => msg.content).join('').replace(/^\s/, '') :
 459                  data,
 460              }
 461            )
 462        ).join("\n"),
 463      });
 464      if (selected_image) {
 465        prompt = `A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER:[img-10]${msg}\nASSISTANT:`;
 466      }
 467      await runLlama(prompt, {
 468        ...params.value,
 469        slot_id: slot_id,
 470        stop: ["</s>", template("{{char}}:"), template("{{user}}:")],
 471      }, "{{char}}");
 472    }
 473
 474    const runCompletion = () => {
 475      if (controller.value) {
 476        console.log('already running...');
 477        return;
 478      }
 479      const { prompt } = session.value;
 480      transcriptUpdate([...session.value.transcript, ["", prompt]]);
 481      runLlama(prompt, {
 482        ...params.value,
 483        slot_id: slot_id,
 484        stop: [],
 485      }, "").finally(() => {
 486        session.value.prompt = session.value.transcript.map(([_, data]) =>
 487          Array.isArray(data) ? data.map(msg => msg.content).join('') : data
 488        ).join('');
 489        session.value.transcript = [];
 490      })
 491    }
 492
 493    const stop = (e) => {
 494      e.preventDefault();
 495      if (controller.value) {
 496        controller.value.abort();
 497        controller.value = null;
 498      }
 499    }
 500
 501    const reset = (e) => {
 502      stop(e);
 503      transcriptUpdate([]);
 504    }
 505
 506    const uploadImage = (e) => {
 507      e.preventDefault();
 508      document.getElementById("fileInput").click();
 509      document.getElementById("fileInput").addEventListener("change", function (event) {
 510        const selectedFile = event.target.files[0];
 511        if (selectedFile) {
 512          const reader = new FileReader();
 513          reader.onload = function () {
 514            const image_data = reader.result;
 515            session.value = { ...session.value, image_selected: image_data };
 516            params.value = {
 517              ...params.value, image_data: [
 518                { data: image_data.replace(/data:image\/[^;]+;base64,/, ''), id: 10 }]
 519            }
 520          };
 521          selected_image = true;
 522          reader.readAsDataURL(selectedFile);
 523        }
 524      });
 525    }
 526
 527    function MessageInput() {
 528      const message = useSignal("")
 529
 530      const submit = (e) => {
 531        stop(e);
 532        chat(message.value);
 533        message.value = "";
 534      }
 535
 536      const enterSubmits = (event) => {
 537        if (event.which === 13 && !event.shiftKey) {
 538          submit(event);
 539        }
 540      }
 541
 542      return html`
 543        <form onsubmit=${submit}>
 544          <div>
 545            <textarea
 546               className=${generating.value ? "loading" : null}
 547               oninput=${(e) => message.value = e.target.value}
 548               onkeypress=${enterSubmits}
 549               placeholder="Say something..."
 550               rows=2
 551               type="text"
 552               value="${message}"
 553            />
 554          </div>
 555          <div class="right">
 556            <button type="submit" disabled=${generating.value}>Send</button>
 557            <button onclick=${uploadImage}>Upload Image</button>
 558            <button onclick=${stop} disabled=${!generating.value}>Stop</button>
 559            <button onclick=${reset}>Reset</button>
 560          </div>
 561        </form>
 562      `
 563    }
 564
 565    function CompletionControls() {
 566      const submit = (e) => {
 567        stop(e);
 568        runCompletion();
 569      }
 570      return html`
 571        <div>
 572          <button onclick=${submit} type="button" disabled=${generating.value}>Start</button>
 573          <button onclick=${stop} disabled=${!generating.value}>Stop</button>
 574          <button onclick=${reset}>Reset</button>
 575        </div>`;
 576    }
 577
 578    const ChatLog = (props) => {
 579      const messages = session.value.transcript;
 580      const container = useRef(null)
 581
 582      useEffect(() => {
 583        // scroll to bottom (if needed)
 584        const parent = container.current.parentElement;
 585        if (parent && parent.scrollHeight <= parent.scrollTop + parent.offsetHeight + 300) {
 586          parent.scrollTo(0, parent.scrollHeight)
 587        }
 588      }, [messages])
 589
 590      const isCompletionMode = session.value.type === 'completion'
 591      const chatLine = ([user, data], index) => {
 592        let message
 593        const isArrayMessage = Array.isArray(data)
 594        if (params.value.n_probs > 0 && isArrayMessage) {
 595          message = html`<${Probabilities} data=${data} />`
 596        } else {
 597          const text = isArrayMessage ?
 598            data.map(msg => msg.content).join('').replace(/^\s+/, '') :
 599            data;
 600          message = isCompletionMode ?
 601            text :
 602            html`<${Markdownish} text=${template(text)} />`
 603        }
 604        if (user) {
 605          return html`<p key=${index}><strong>${template(user)}:</strong> ${message}</p>`
 606        } else {
 607          return isCompletionMode ?
 608            html`<span key=${index}>${message}</span>` :
 609            html`<p key=${index}>${message}</p>`
 610        }
 611      };
 612
 613      const handleCompletionEdit = (e) => {
 614        session.value.prompt = e.target.innerText;
 615        session.value.transcript = [];
 616      }
 617
 618      return html`
 619        <div id="chat" ref=${container} key=${messages.length}>
 620          <img style="width: 60%;${!session.value.image_selected ? `display: none;` : ``}" src="${session.value.image_selected}"/>
 621          <span contenteditable=${isCompletionMode} ref=${container} oninput=${handleCompletionEdit}>
 622            ${messages.flatMap(chatLine)}
 623          </span>
 624        </div>`;
 625    };
 626
 627    const ConfigForm = (props) => {
 628      const updateSession = (el) => session.value = { ...session.value, [el.target.name]: el.target.value }
 629      const updateParams = (el) => params.value = { ...params.value, [el.target.name]: el.target.value }
 630      const updateParamsFloat = (el) => params.value = { ...params.value, [el.target.name]: parseFloat(el.target.value) }
 631      const updateParamsInt = (el) => params.value = { ...params.value, [el.target.name]: Math.floor(parseFloat(el.target.value)) }
 632      const updateParamsBool = (el) => params.value = { ...params.value, [el.target.name]: el.target.checked }
 633
 634      const grammarJsonSchemaPropOrder = signal('')
 635      const updateGrammarJsonSchemaPropOrder = (el) => grammarJsonSchemaPropOrder.value = el.target.value
 636      const convertJSONSchemaGrammar = async () => {
 637        try {
 638          let schema = JSON.parse(params.value.grammar)
 639          const converter = new SchemaConverter({
 640            prop_order: grammarJsonSchemaPropOrder.value
 641              .split(',')
 642              .reduce((acc, cur, i) => ({ ...acc, [cur.trim()]: i }), {}),
 643            allow_fetch: true,
 644          })
 645          schema = await converter.resolveRefs(schema, 'input')
 646          converter.visit(schema, '')
 647          params.value = {
 648            ...params.value,
 649            grammar: converter.formatGrammar(),
 650          }
 651        } catch (e) {
 652          alert(`Convert failed: ${e.message}`)
 653        }
 654      }
 655
 656      const FloatField = ({ label, max, min, name, step, value }) => {
 657        return html`
 658          <div>
 659            <label for="${name}">${label}</label>
 660            <input type="range" id="${name}" min="${min}" max="${max}" step="${step}" name="${name}" value="${value}" oninput=${updateParamsFloat} />
 661            <span>${value}</span>
 662          </div>
 663        `
 664      };
 665
 666      const IntField = ({ label, max, min, name, value }) => {
 667        return html`
 668          <div>
 669            <label for="${name}">${label}</label>
 670            <input type="range" id="${name}" min="${min}" max="${max}" name="${name}" value="${value}" oninput=${updateParamsInt} />
 671            <span>${value}</span>
 672          </div>
 673        `
 674      };
 675
 676      const BoolField = ({ label, name, value }) => {
 677        return html`
 678          <div>
 679            <label for="${name}">${label}</label>
 680            <input type="checkbox" id="${name}" name="${name}" checked="${value}" onclick=${updateParamsBool} />
 681          </div>
 682        `
 683      };
 684
 685      const userTemplateReset = (e) => {
 686        e.preventDefault();
 687        userTemplateResetToDefaultAndApply()
 688      }
 689
 690      const UserTemplateResetButton = () => {
 691        if (selectedUserTemplate.value.name == 'default') {
 692          return html`
 693            <button disabled>Using default template</button>
 694          `
 695        }
 696
 697        return html`
 698          <button onclick=${userTemplateReset}>Reset all to default</button>
 699        `
 700      };
 701
 702      useEffect(() => {
 703        // autosave template on every change
 704        userTemplateAutosave()
 705      }, [session.value, params.value])
 706
 707      const GrammarControl = () => (
 708        html`
 709          <div>
 710            <label for="template">Grammar</label>
 711            <textarea id="grammar" name="grammar" placeholder="Use gbnf or JSON Schema+convert" value="${params.value.grammar}" rows=4 oninput=${updateParams}/>
 712            <input type="text" name="prop-order" placeholder="order: prop1,prop2,prop3" oninput=${updateGrammarJsonSchemaPropOrder} />
 713            <button type="button" onclick=${convertJSONSchemaGrammar}>Convert JSON Schema</button>
 714          </div>
 715          `
 716      );
 717
 718      const PromptControlFieldSet = () => (
 719        html`
 720        <fieldset>
 721          <div>
 722            <label htmlFor="prompt">Prompt</label>
 723            <textarea type="text" name="prompt" value="${session.value.prompt}" oninput=${updateSession}/>
 724          </div>
 725        </fieldset>
 726        `
 727      );
 728
 729      const ChatConfigForm = () => (
 730        html`
 731          ${PromptControlFieldSet()}
 732
 733          <fieldset class="two">
 734            <div>
 735              <label for="user">User name</label>
 736              <input type="text" name="user" value="${session.value.user}" oninput=${updateSession} />
 737            </div>
 738
 739            <div>
 740              <label for="bot">Bot name</label>
 741              <input type="text" name="char" value="${session.value.char}" oninput=${updateSession} />
 742            </div>
 743          </fieldset>
 744
 745          <fieldset>
 746            <div>
 747              <label for="template">Prompt template</label>
 748              <textarea id="template" name="template" value="${session.value.template}" rows=4 oninput=${updateSession}/>
 749            </div>
 750
 751            <div>
 752              <label for="template">Chat history template</label>
 753              <textarea id="template" name="historyTemplate" value="${session.value.historyTemplate}" rows=1 oninput=${updateSession}/>
 754            </div>
 755            ${GrammarControl()}
 756          </fieldset>
 757      `
 758      );
 759
 760      const CompletionConfigForm = () => (
 761        html`
 762          ${PromptControlFieldSet()}
 763          <fieldset>${GrammarControl()}</fieldset>
 764        `
 765      );
 766
 767      return html`
 768        <form>
 769          <fieldset class="two">
 770            <${UserTemplateResetButton}/>
 771            <div>
 772              <label class="slim"><input type="radio" name="type" value="chat" checked=${session.value.type === "chat"} oninput=${updateSession} /> Chat</label>
 773              <label class="slim"><input type="radio" name="type" value="completion" checked=${session.value.type === "completion"} oninput=${updateSession} /> Completion</label>
 774            </div>
 775          </fieldset>
 776
 777          ${session.value.type === 'chat' ? ChatConfigForm() : CompletionConfigForm()}
 778
 779          <fieldset class="two">
 780            ${IntField({ label: "Predictions", max: 2048, min: -1, name: "n_predict", value: params.value.n_predict })}
 781            ${FloatField({ label: "Temperature", max: 2.0, min: 0.0, name: "temperature", step: 0.01, value: params.value.temperature })}
 782            ${FloatField({ label: "Penalize repeat sequence", max: 2.0, min: 0.0, name: "repeat_penalty", step: 0.01, value: params.value.repeat_penalty })}
 783            ${IntField({ label: "Consider N tokens for penalize", max: 2048, min: 0, name: "repeat_last_n", value: params.value.repeat_last_n })}
 784            ${IntField({ label: "Top-K sampling", max: 100, min: -1, name: "top_k", value: params.value.top_k })}
 785            ${FloatField({ label: "Top-P sampling", max: 1.0, min: 0.0, name: "top_p", step: 0.01, value: params.value.top_p })}
 786            ${FloatField({ label: "Min-P sampling", max: 1.0, min: 0.0, name: "min_p", step: 0.01, value: params.value.min_p })}
 787          </fieldset>
 788          <details>
 789            <summary>More options</summary>
 790            <fieldset class="two">
 791              ${FloatField({ label: "Typical P", max: 1.0, min: 0.0, name: "typical_p", step: 0.01, value: params.value.typical_p })}
 792              ${FloatField({ label: "Presence penalty", max: 1.0, min: 0.0, name: "presence_penalty", step: 0.01, value: params.value.presence_penalty })}
 793              ${FloatField({ label: "Frequency penalty", max: 1.0, min: 0.0, name: "frequency_penalty", step: 0.01, value: params.value.frequency_penalty })}
 794            </fieldset>
 795            <hr />
 796            <fieldset class="three">
 797              <div>
 798                <label><input type="radio" name="mirostat" value="0" checked=${params.value.mirostat == 0} oninput=${updateParamsInt} /> no Mirostat</label>
 799                <label><input type="radio" name="mirostat" value="1" checked=${params.value.mirostat == 1} oninput=${updateParamsInt} /> Mirostat v1</label>
 800                <label><input type="radio" name="mirostat" value="2" checked=${params.value.mirostat == 2} oninput=${updateParamsInt} /> Mirostat v2</label>
 801              </div>
 802              ${FloatField({ label: "Mirostat tau", max: 10.0, min: 0.0, name: "mirostat_tau", step: 0.01, value: params.value.mirostat_tau })}
 803              ${FloatField({ label: "Mirostat eta", max: 1.0, min: 0.0, name: "mirostat_eta", step: 0.01, value: params.value.mirostat_eta })}
 804            </fieldset>
 805            <fieldset>
 806              ${IntField({ label: "Show Probabilities", max: 10, min: 0, name: "n_probs", value: params.value.n_probs })}
 807            </fieldset>
 808            <fieldset>
 809              ${IntField({ label: "Min Probabilities from each Sampler", max: 10, min: 0, name: "min_keep", value: params.value.min_keep })}
 810            </fieldset>
 811            <fieldset>
 812              <label for="api_key">API Key</label>
 813              <input type="text" name="api_key" value="${params.value.api_key}" placeholder="Enter API key" oninput=${updateParams} />
 814            </fieldset>
 815          </details>
 816        </form>
 817      `
 818    }
 819
 820    const probColor = (p) => {
 821      const r = Math.floor(192 * (1 - p));
 822      const g = Math.floor(192 * p);
 823      return `rgba(${r},${g},0,0.3)`;
 824    }
 825
 826    const Probabilities = (params) => {
 827      return params.data.map(msg => {
 828        const { completion_probabilities } = msg;
 829        if (
 830          !completion_probabilities ||
 831          completion_probabilities.length === 0
 832        ) return msg.content
 833
 834        if (completion_probabilities.length > 1) {
 835          // Not for byte pair
 836          if (completion_probabilities[0].content.startsWith('byte: \\')) return msg.content
 837
 838          const splitData = completion_probabilities.map(prob => ({
 839            content: prob.content,
 840            completion_probabilities: [prob]
 841          }))
 842          return html`<${Probabilities} data=${splitData} />`
 843        }
 844
 845        const { probs, content } = completion_probabilities[0]
 846        const found = probs.find(p => p.tok_str === msg.content)
 847        const pColor = found ? probColor(found.prob) : 'transparent'
 848
 849        const popoverChildren = html`
 850          <div class="prob-set">
 851            ${probs.map((p, index) => {
 852          return html`
 853                <div
 854                  key=${index}
 855                  title=${`prob: ${p.prob}`}
 856                  style=${{
 857              padding: '0.3em',
 858              backgroundColor: p.tok_str === content ? probColor(p.prob) : 'transparent'
 859            }}
 860                >
 861                  <span>${p.tok_str}: </span>
 862                  <span>${Math.floor(p.prob * 100)}%</span>
 863                </div>
 864              `
 865        })}
 866          </div>
 867        `
 868
 869        return html`
 870          <${Popover} style=${{ backgroundColor: pColor }} popoverChildren=${popoverChildren}>
 871            ${msg.content.match(/\n/gim) ? html`<br />` : msg.content}
 872          </>
 873        `
 874      });
 875    }
 876
 877    // poor mans markdown replacement
 878    const Markdownish = (params) => {
 879      const md = params.text
 880        .replace(/&/g, '&amp;')
 881        .replace(/</g, '&lt;')
 882        .replace(/>/g, '&gt;')
 883        .replace(/^#{1,6} (.*)$/gim, '<h3>$1</h3>')
 884        .replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>')
 885        .replace(/__(.*?)__/g, '<strong>$1</strong>')
 886        .replace(/\*(.*?)\*/g, '<em>$1</em>')
 887        .replace(/_(.*?)_/g, '<em>$1</em>')
 888        .replace(/```.*?\n([\s\S]*?)```/g, '<pre><code>$1</code></pre>')
 889        .replace(/`(.*?)`/g, '<code>$1</code>')
 890        .replace(/\n/gim, '<br />');
 891      return html`<span dangerouslySetInnerHTML=${{ __html: md }} />`;
 892    };
 893
 894    const ModelGenerationInfo = (params) => {
 895      if (!llamaStats.value) {
 896        return html`<span/>`
 897      }
 898      return html`
 899        <span>
 900          ${llamaStats.value.tokens_predicted} predicted, ${llamaStats.value.tokens_cached} cached, ${llamaStats.value.timings.predicted_per_token_ms.toFixed()}ms per token, ${llamaStats.value.timings.predicted_per_second.toFixed(2)} tokens per second
 901        </span>
 902      `
 903    }
 904
 905    // simple popover impl
 906    const Popover = (props) => {
 907      const isOpen = useSignal(false);
 908      const position = useSignal({ top: '0px', left: '0px' });
 909      const buttonRef = useRef(null);
 910      const popoverRef = useRef(null);
 911
 912      const togglePopover = () => {
 913        if (buttonRef.current) {
 914          const rect = buttonRef.current.getBoundingClientRect();
 915          position.value = {
 916            top: `${rect.bottom + window.scrollY}px`,
 917            left: `${rect.left + window.scrollX}px`,
 918          };
 919        }
 920        isOpen.value = !isOpen.value;
 921      };
 922
 923      const handleClickOutside = (event) => {
 924        if (popoverRef.current && !popoverRef.current.contains(event.target) && !buttonRef.current.contains(event.target)) {
 925          isOpen.value = false;
 926        }
 927      };
 928
 929      useEffect(() => {
 930        document.addEventListener('mousedown', handleClickOutside);
 931        return () => {
 932          document.removeEventListener('mousedown', handleClickOutside);
 933        };
 934      }, []);
 935
 936      return html`
 937        <span style=${props.style} ref=${buttonRef} onClick=${togglePopover}>${props.children}</span>
 938        ${isOpen.value && html`
 939          <${Portal} into="#portal">
 940            <div
 941              ref=${popoverRef}
 942              class="popover-content"
 943              style=${{
 944            top: position.value.top,
 945            left: position.value.left,
 946          }}
 947            >
 948              ${props.popoverChildren}
 949            </div>
 950          </${Portal}>
 951        `}
 952      `;
 953    };
 954
 955    // Source: preact-portal (https://github.com/developit/preact-portal/blob/master/src/preact-portal.js)
 956    /** Redirect rendering of descendants into the given CSS selector */
 957    class Portal extends Component {
 958      componentDidUpdate(props) {
 959        for (let i in props) {
 960          if (props[i] !== this.props[i]) {
 961            return setTimeout(this.renderLayer);
 962          }
 963        }
 964      }
 965
 966      componentDidMount() {
 967        this.isMounted = true;
 968        this.renderLayer = this.renderLayer.bind(this);
 969        this.renderLayer();
 970      }
 971
 972      componentWillUnmount() {
 973        this.renderLayer(false);
 974        this.isMounted = false;
 975        if (this.remote && this.remote.parentNode) this.remote.parentNode.removeChild(this.remote);
 976      }
 977
 978      findNode(node) {
 979        return typeof node === 'string' ? document.querySelector(node) : node;
 980      }
 981
 982      renderLayer(show = true) {
 983        if (!this.isMounted) return;
 984
 985        // clean up old node if moving bases:
 986        if (this.props.into !== this.intoPointer) {
 987          this.intoPointer = this.props.into;
 988          if (this.into && this.remote) {
 989            this.remote = render(html`<${PortalProxy} />`, this.into, this.remote);
 990          }
 991          this.into = this.findNode(this.props.into);
 992        }
 993
 994        this.remote = render(html`
 995          <${PortalProxy} context=${this.context}>
 996            ${show && this.props.children || null}
 997          </${PortalProxy}>
 998        `, this.into, this.remote);
 999      }
1000
1001      render() {
1002        return null;
1003      }
1004    }
1005    // high-order component that renders its first child if it exists.
1006    // used as a conditional rendering proxy.
1007    class PortalProxy extends Component {
1008      getChildContext() {
1009        return this.props.context;
1010      }
1011      render({ children }) {
1012        return children || null;
1013      }
1014    }
1015
1016    function App(props) {
1017      useEffect(() => {
1018        const query = new URLSearchParams(location.search).get("q");
1019        if (query) chat(query);
1020      }, []);
1021
1022      return html`
1023        <div class="mode-${session.value.type}">
1024          <header>
1025            <img src="llama_cpp.png" style="width:100%"/>
1026          </header>
1027
1028          <section id="write">
1029            <${session.value.type === 'chat' ? MessageInput : CompletionControls} />
1030          </section>
1031
1032          <main id="content">
1033            <${chatStarted.value ? ChatLog : ConfigForm} />
1034          </main>
1035
1036
1037          <footer>
1038            <p><${ModelGenerationInfo} /></p>
1039            <p>Powered by <a href="https://github.com/ggml-org/llama.cpp">llama.cpp</a> and <a href="https://ggml.ai">ggml.ai</a>.</p>
1040          </footer>
1041        </div>
1042      `;
1043    }
1044
1045    render(h(App), document.querySelector('#container'));
1046  </script>
1047</head>
1048
1049<body>
1050  <div id="container">
1051    <input type="file" id="fileInput" accept="image/*" style="display: none;">
1052  </div>
1053  <div id="portal"></div>
1054</body>
1055
1056</html>