archive llama.cpp-b8008.tar.gz
corpus lotr.txt map1_bromm.txt map1_dagna.txt map1_keldor.txt map1_skara.txt map1_thrain.txt
llama.cpp
.devops
nix apps.nix devshells.nix docker.nix jetson-support.nix nixpkgs-instances.nix package-gguf-py.nix package.nix python-scripts.nix scope.nix sif.nix
cann.Dockerfile cpu.Dockerfile cuda-new.Dockerfile cuda.Dockerfile intel.Dockerfile llama-cli-cann.Dockerfile llama-cpp-cuda.srpm.spec llama-cpp.srpm.spec musa.Dockerfile rocm.Dockerfile s390x.Dockerfile tools.sh vulkan.Dockerfile
.gemini settings.json
.github
ISSUE_TEMPLATE 010-bug-compilation.yml 011-bug-results.yml 019-bug-misc.yml 020-enhancement.yml 030-research.yml 040-refactor.yml config.yml
actions
get-tag-name action.yml
install-exe action.yml
linux-setup-spacemit action.yml
linux-setup-vulkan action.yml
unarchive-tar action.yml
windows-setup-cuda action.yml
windows-setup-rocm action.yml
workflows bench.yml.disabled build-cache.yml build-cmake-pkg.yml build-linux-cross.yml build.yml check-vendor.yml close-issue.yml copilot-setup-steps.yml docker.yml editorconfig.yml gguf-publish.yml labeler.yml pre-tokenizer-hashes.yml python-check-requirements.yml python-lint.yml python-type-check.yml release.yml server-metal.yml server-webui.yml server.yml update-ops-docs.yml winget.yml
labeler.yml pull_request_template.md
benches
dgx-spark aime25_openai__gpt-oss-120b-high_temp1.0_20251109_094547.html aime25_openai__gpt-oss-120b-high_temp1.0_20251109_094547.json aime25_openai__gpt-oss-120b-high_temp1.0_20251109_094547_allresults.json dgx-spark.md
mac-m2-ultra mac-m2-ultra.md
ci README-MUSA.md README.md run.sh
cmake arm64-apple-clang.cmake arm64-windows-llvm.cmake build-info.cmake common.cmake download-models.cmake git-vars.cmake license.cmake llama-config.cmake.in llama.pc.in riscv64-spacemit-linux-gnu-gcc.cmake x64-windows-llvm.cmake
common
jinja README.md caps.cpp caps.h lexer.cpp lexer.h parser.cpp parser.h runtime.cpp runtime.h string.cpp string.h utils.h value.cpp value.h
CMakeLists.txt arg.cpp arg.h base64.hpp build-info.cpp.in chat-parser-xml-toolcall.cpp chat-parser-xml-toolcall.h chat-parser.cpp chat-parser.h chat-peg-parser.cpp chat-peg-parser.h chat.cpp chat.h common.cpp common.h console.cpp console.h debug.cpp debug.h download.cpp download.h http.h json-partial.cpp json-partial.h json-schema-to-grammar.cpp json-schema-to-grammar.h llguidance.cpp log.cpp log.h ngram-cache.cpp ngram-cache.h ngram-map.cpp ngram-map.h ngram-mod.cpp ngram-mod.h peg-parser.cpp peg-parser.h preset.cpp preset.h regex-partial.cpp regex-partial.h sampling.cpp sampling.h speculative.cpp speculative.h unicode.cpp unicode.h
docs
android imported-into-android-studio.jpg
backend
VirtGPU configuration.md development.md
snapdragon CMakeUserPresets.json README.md developer.md windows.md
BLIS.md CANN.md CUDA-FEDORA.md OPENCL.md SYCL.md VirtGPU.md ZenDNN.md zDNN.md
development
llama-star idea-arch.key idea-arch.pdf
HOWTO-add-model.md debugging-tests.md parsing.md token_generation_performance_tips.md
multimodal MobileVLM.md gemma3.md glmedge.md granitevision.md llava.md minicpmo2.6.md minicpmo4.0.md minicpmv2.5.md minicpmv2.6.md minicpmv4.0.md minicpmv4.5.md
ops BLAS.csv CANN.csv CPU.csv CUDA.csv Metal.csv OpenCL.csv SYCL.csv Vulkan.csv WebGPU.csv ZenDNN.csv zDNN.csv
android.md build-riscv64-spacemit.md build-s390x.md build.md docker.md function-calling.md install.md llguidance.md multimodal.md ops.md preset.md speculative.md
examples
batched CMakeLists.txt README.md batched.cpp
batched.swift
Sources main.swift
.gitignore Makefile Package.swift README.md
convert-llama2c-to-ggml CMakeLists.txt README.md convert-llama2c-to-ggml.cpp
debug CMakeLists.txt README.md debug.cpp
deprecation-warning README.md deprecation-warning.cpp
diffusion CMakeLists.txt README.md diffusion-cli.cpp
embedding CMakeLists.txt README.md embedding.cpp
eval-callback CMakeLists.txt README.md eval-callback.cpp
gen-docs CMakeLists.txt gen-docs.cpp
gguf CMakeLists.txt gguf.cpp
gguf-hash
deps
rotate-bits package.json rotate-bits.h
sha1 package.json sha1.c sha1.h
sha256 package.json sha256.c sha256.h
xxhash clib.json xxhash.c xxhash.h
CMakeLists.txt README.md gguf-hash.cpp
idle CMakeLists.txt README.md idle.cpp
llama.android
app
src
main
java
com
example
llama MainActivity.kt MessageAdapter.kt
res
drawable bg_assistant_message.xml bg_user_message.xml ic_launcher_background.xml ic_launcher_foreground.xml outline_folder_open_24.xml outline_send_24.xml
layout activity_main.xml item_message_assistant.xml item_message_user.xml
mipmap-anydpi ic_launcher.xml ic_launcher_round.xml
mipmap-hdpi ic_launcher.webp ic_launcher_round.webp
mipmap-mdpi ic_launcher.webp ic_launcher_round.webp
mipmap-xhdpi ic_launcher.webp ic_launcher_round.webp
mipmap-xxhdpi ic_launcher.webp ic_launcher_round.webp
mipmap-xxxhdpi ic_launcher.webp ic_launcher_round.webp
values colors.xml strings.xml themes.xml
xml backup_rules.xml data_extraction_rules.xml
AndroidManifest.xml
.gitignore build.gradle.kts proguard-rules.pro
gradle
wrapper gradle-wrapper.jar gradle-wrapper.properties
libs.versions.toml
lib
src
androidTest
java
android
llama
cpp ExampleInstrumentedTest.kt
main
cpp CMakeLists.txt ai_chat.cpp logging.h
java
com
arm
aichat
gguf FileType.kt GgufMetadata.kt GgufMetadataReader.kt
internal
gguf GgufMetadataReaderImpl.kt
InferenceEngineImpl.kt
AiChat.kt InferenceEngine.kt
AndroidManifest.xml
test
java
android
llama
cpp ExampleUnitTest.kt
.gitignore build.gradle.kts consumer-rules.pro proguard-rules.pro
.gitignore build.gradle.kts gradle.properties gradlew settings.gradle.kts
llama.swiftui
llama.cpp.swift LibLlama.swift
llama.swiftui
Assets.xcassets
AppIcon.appiconset Contents.json
Contents.json
Models LlamaState.swift
Resources
models .gitignore
UI ContentView.swift DownloadButton.swift InputButton.swift LoadCustomButton.swift
llama_swiftuiApp.swift
llama.swiftui.xcodeproj
project.xcworkspace contents.xcworkspacedata
project.pbxproj
.gitignore README.md
lookahead CMakeLists.txt README.md lookahead.cpp
lookup CMakeLists.txt README.md lookup-create.cpp lookup-merge.cpp lookup-stats.cpp lookup.cpp
model-conversion
scripts
causal compare-embeddings-logits.sh compare-logits.py convert-model.sh modelcard.template run-casual-gen-embeddings-org.py run-converted-model-embeddings-logits.sh run-converted-model.sh run-org-model.py
embedding compare-embeddings-logits.sh convert-model.sh modelcard.template run-converted-model.sh run-original-model.py
utils __init__.py check-nmse.py common.py compare_tokens.py create-collection-add-model.sh curl-embedding-server.sh hf-add-model-to-collection.py hf-create-collection.py hf-create-model.py hf-upload-gguf-model.py inspect-converted-model.sh inspect-org-model.py perplexity-gen.sh perplexity-run-simple.sh perplexity-run.sh quantize.sh run-embedding-server.sh semantic_check.py tensor-info.py
.gitignore Makefile README.md requirements.txt
parallel CMakeLists.txt README.md parallel.cpp
passkey CMakeLists.txt README.md passkey.cpp
retrieval CMakeLists.txt README.md retrieval.cpp
save-load-state CMakeLists.txt save-load-state.cpp
simple CMakeLists.txt README.md simple.cpp
simple-chat CMakeLists.txt README.md simple-chat.cpp
simple-cmake-pkg .gitignore CMakeLists.txt README.md
speculative CMakeLists.txt README.md speculative.cpp
speculative-simple CMakeLists.txt README.md speculative-simple.cpp
sycl CMakeLists.txt README.md build.sh ls-sycl-device.cpp run-llama2.sh test.sh win-build-sycl.bat win-run-llama2.bat win-test.bat
training CMakeLists.txt README.md finetune.cpp
CMakeLists.txt convert_legacy_llama.py json_schema_pydantic_example.py json_schema_to_grammar.py llama.vim pydantic_models_to_grammar.py pydantic_models_to_grammar_examples.py reason-act.sh regex_to_grammar.py server-llama2-13B.sh server_embd.py ts-type-to-grammar.sh
ggml
cmake GitVars.cmake common.cmake ggml-config.cmake.in
include ggml-alloc.h ggml-backend.h ggml-blas.h ggml-cann.h ggml-cpp.h ggml-cpu.h ggml-cuda.h ggml-hexagon.h ggml-metal.h ggml-opencl.h ggml-opt.h ggml-rpc.h ggml-sycl.h ggml-virtgpu.h ggml-vulkan.h ggml-webgpu.h ggml-zdnn.h ggml-zendnn.h ggml.h gguf.h
src
ggml-blas CMakeLists.txt ggml-blas.cpp
ggml-cann CMakeLists.txt acl_tensor.cpp acl_tensor.h aclnn_ops.cpp aclnn_ops.h common.h ggml-cann.cpp
ggml-cpu
amx amx.cpp amx.h common.h mmq.cpp mmq.h
arch
arm cpu-feats.cpp quants.c repack.cpp
loongarch quants.c
powerpc cpu-feats.cpp quants.c
riscv cpu-feats.cpp quants.c repack.cpp
s390 cpu-feats.cpp quants.c
wasm quants.c
x86 cpu-feats.cpp quants.c repack.cpp
cmake FindSIMD.cmake
kleidiai kernels.cpp kernels.h kleidiai.cpp kleidiai.h
llamafile sgemm-ppc.h sgemm.cpp sgemm.h
spacemit ime.cpp ime.h ime1_kernels.cpp ime_kernels.h
CMakeLists.txt arch-fallback.h binary-ops.cpp binary-ops.h common.h ggml-cpu-impl.h ggml-cpu.c ggml-cpu.cpp hbm.cpp hbm.h ops.cpp ops.h quants.c quants.h repack.cpp repack.h simd-mappings.h traits.cpp traits.h unary-ops.cpp unary-ops.h vec.cpp vec.h
ggml-cuda
template-instances fattn-mma-f16-instance-ncols1_1-ncols2_16.cu fattn-mma-f16-instance-ncols1_1-ncols2_32.cu fattn-mma-f16-instance-ncols1_1-ncols2_8.cu fattn-mma-f16-instance-ncols1_16-ncols2_1.cu fattn-mma-f16-instance-ncols1_16-ncols2_2.cu fattn-mma-f16-instance-ncols1_16-ncols2_4.cu fattn-mma-f16-instance-ncols1_2-ncols2_16.cu fattn-mma-f16-instance-ncols1_2-ncols2_32.cu fattn-mma-f16-instance-ncols1_2-ncols2_4.cu fattn-mma-f16-instance-ncols1_2-ncols2_8.cu fattn-mma-f16-instance-ncols1_32-ncols2_1.cu fattn-mma-f16-instance-ncols1_32-ncols2_2.cu fattn-mma-f16-instance-ncols1_4-ncols2_16.cu fattn-mma-f16-instance-ncols1_4-ncols2_2.cu fattn-mma-f16-instance-ncols1_4-ncols2_4.cu fattn-mma-f16-instance-ncols1_4-ncols2_8.cu fattn-mma-f16-instance-ncols1_64-ncols2_1.cu fattn-mma-f16-instance-ncols1_8-ncols2_1.cu fattn-mma-f16-instance-ncols1_8-ncols2_2.cu fattn-mma-f16-instance-ncols1_8-ncols2_4.cu fattn-mma-f16-instance-ncols1_8-ncols2_8.cu fattn-tile-instance-dkq112-dv112.cu fattn-tile-instance-dkq128-dv128.cu fattn-tile-instance-dkq256-dv256.cu fattn-tile-instance-dkq40-dv40.cu fattn-tile-instance-dkq576-dv512.cu fattn-tile-instance-dkq64-dv64.cu fattn-tile-instance-dkq72-dv72.cu fattn-tile-instance-dkq80-dv80.cu fattn-tile-instance-dkq96-dv96.cu fattn-vec-instance-f16-f16.cu fattn-vec-instance-f16-q4_0.cu fattn-vec-instance-f16-q4_1.cu fattn-vec-instance-f16-q5_0.cu fattn-vec-instance-f16-q5_1.cu fattn-vec-instance-f16-q8_0.cu fattn-vec-instance-q4_0-f16.cu fattn-vec-instance-q4_0-q4_0.cu fattn-vec-instance-q4_0-q4_1.cu fattn-vec-instance-q4_0-q5_0.cu fattn-vec-instance-q4_0-q5_1.cu fattn-vec-instance-q4_0-q8_0.cu fattn-vec-instance-q4_1-f16.cu fattn-vec-instance-q4_1-q4_0.cu fattn-vec-instance-q4_1-q4_1.cu fattn-vec-instance-q4_1-q5_0.cu fattn-vec-instance-q4_1-q5_1.cu fattn-vec-instance-q4_1-q8_0.cu fattn-vec-instance-q5_0-f16.cu fattn-vec-instance-q5_0-q4_0.cu fattn-vec-instance-q5_0-q4_1.cu fattn-vec-instance-q5_0-q5_0.cu fattn-vec-instance-q5_0-q5_1.cu fattn-vec-instance-q5_0-q8_0.cu fattn-vec-instance-q5_1-f16.cu fattn-vec-instance-q5_1-q4_0.cu fattn-vec-instance-q5_1-q4_1.cu fattn-vec-instance-q5_1-q5_0.cu fattn-vec-instance-q5_1-q5_1.cu fattn-vec-instance-q5_1-q8_0.cu fattn-vec-instance-q8_0-f16.cu fattn-vec-instance-q8_0-q4_0.cu fattn-vec-instance-q8_0-q4_1.cu fattn-vec-instance-q8_0-q5_0.cu fattn-vec-instance-q8_0-q5_1.cu fattn-vec-instance-q8_0-q8_0.cu generate_cu_files.py mmf-instance-ncols_1.cu mmf-instance-ncols_10.cu mmf-instance-ncols_11.cu mmf-instance-ncols_12.cu mmf-instance-ncols_13.cu mmf-instance-ncols_14.cu mmf-instance-ncols_15.cu mmf-instance-ncols_16.cu mmf-instance-ncols_2.cu mmf-instance-ncols_3.cu mmf-instance-ncols_4.cu mmf-instance-ncols_5.cu mmf-instance-ncols_6.cu mmf-instance-ncols_7.cu mmf-instance-ncols_8.cu mmf-instance-ncols_9.cu mmq-instance-iq1_s.cu mmq-instance-iq2_s.cu mmq-instance-iq2_xs.cu mmq-instance-iq2_xxs.cu mmq-instance-iq3_s.cu mmq-instance-iq3_xxs.cu mmq-instance-iq4_nl.cu mmq-instance-iq4_xs.cu mmq-instance-mxfp4.cu mmq-instance-q2_k.cu mmq-instance-q3_k.cu mmq-instance-q4_0.cu mmq-instance-q4_1.cu mmq-instance-q4_k.cu mmq-instance-q5_0.cu mmq-instance-q5_1.cu mmq-instance-q5_k.cu mmq-instance-q6_k.cu mmq-instance-q8_0.cu
vendors cuda.h hip.h musa.h
CMakeLists.txt acc.cu acc.cuh add-id.cu add-id.cuh arange.cu arange.cuh argmax.cu argmax.cuh argsort.cu argsort.cuh binbcast.cu binbcast.cuh clamp.cu clamp.cuh common.cuh concat.cu concat.cuh conv-transpose-1d.cu conv-transpose-1d.cuh conv2d-dw.cu conv2d-dw.cuh conv2d-transpose.cu conv2d-transpose.cuh conv2d.cu conv2d.cuh convert.cu convert.cuh count-equal.cu count-equal.cuh cp-async.cuh cpy-utils.cuh cpy.cu cpy.cuh cross-entropy-loss.cu cross-entropy-loss.cuh cumsum.cu cumsum.cuh dequantize.cuh diag.cu diag.cuh diagmask.cu diagmask.cuh fattn-common.cuh fattn-mma-f16.cuh fattn-tile.cu fattn-tile.cuh fattn-vec.cuh fattn-wmma-f16.cu fattn-wmma-f16.cuh fattn.cu fattn.cuh fill.cu fill.cuh getrows.cu getrows.cuh ggml-cuda.cu gla.cu gla.cuh im2col.cu im2col.cuh mean.cu mean.cuh mma.cuh mmf.cu mmf.cuh mmid.cu mmid.cuh mmq.cu mmq.cuh mmvf.cu mmvf.cuh mmvq.cu mmvq.cuh norm.cu norm.cuh opt-step-adamw.cu opt-step-adamw.cuh opt-step-sgd.cu opt-step-sgd.cuh out-prod.cu out-prod.cuh pad.cu pad.cuh pad_reflect_1d.cu pad_reflect_1d.cuh pool2d.cu pool2d.cuh quantize.cu quantize.cuh reduce_rows.cuh roll.cu roll.cuh rope.cu rope.cuh scale.cu scale.cuh set-rows.cu set-rows.cuh set.cu set.cuh softcap.cu softcap.cuh softmax.cu softmax.cuh solve_tri.cu solve_tri.cuh ssm-conv.cu ssm-conv.cuh ssm-scan.cu ssm-scan.cuh sum.cu sum.cuh sumrows.cu sumrows.cuh top-k.cu top-k.cuh topk-moe.cu topk-moe.cuh tri.cu tri.cuh tsembd.cu tsembd.cuh unary.cu unary.cuh upscale.cu upscale.cuh vecdotq.cuh wkv.cu wkv.cuh
ggml-hexagon
htp CMakeLists.txt act-ops.c argsort-ops.c binary-ops.c cmake-toolchain.cmake cpy-ops.c flash-attn-ops.c get-rows-ops.c hex-dma.c hex-dma.h hex-dump.h hex-fastdiv.h hex-utils.h htp-ctx.h htp-msg.h htp-ops.h htp_iface.idl hvx-arith.h hvx-base.h hvx-copy.h hvx-div.h hvx-dump.h hvx-exp.h hvx-floor.h hvx-inverse.h hvx-reduce.h hvx-scale.h hvx-sigmoid.h hvx-sqrt.h hvx-types.h hvx-utils.h main.c matmul-ops.c rope-ops.c set-rows-ops.c softmax-ops.c sum-rows-ops.c unary-ops.c worker-pool.c worker-pool.h
CMakeLists.txt ggml-hexagon.cpp htp-drv.cpp htp-drv.h libdl.h libggml-htp.inf op-desc.h
ggml-hip CMakeLists.txt
ggml-metal CMakeLists.txt ggml-metal-common.cpp ggml-metal-common.h ggml-metal-context.h ggml-metal-context.m ggml-metal-device.cpp ggml-metal-device.h ggml-metal-device.m ggml-metal-impl.h ggml-metal-ops.cpp ggml-metal-ops.h ggml-metal.cpp ggml-metal.metal
ggml-musa CMakeLists.txt mudnn.cu mudnn.cuh
ggml-opencl
kernels add.cl add_id.cl argsort.cl clamp.cl concat.cl conv2d.cl conv2d_f16_f32.cl cpy.cl cvt.cl diag_mask_inf.cl div.cl embed_kernel.py expm1.cl fill.cl flash_attn_f16.cl flash_attn_f32.cl flash_attn_f32_f16.cl gelu.cl gemm_moe_mxfp4_f32.cl gemv_moe_mxfp4_f32.cl gemv_noshuffle.cl gemv_noshuffle_general.cl gemv_noshuffle_general_q8_0_f32.cl get_rows.cl glu.cl group_norm.cl im2col_f16.cl im2col_f32.cl mean.cl mul.cl mul_mat_Ab_Bi_8x4.cl mul_mat_f16_f32.cl mul_mm_f16_f32_kq_kqv.cl mul_mm_f16_f32_l4_lm.cl mul_mm_f32_f32_l4_lm.cl mul_mm_q6_k_f32_l4_lm.cl mul_mm_q8_0_f32_8x4.cl mul_mm_q8_0_f32_l4_lm.cl mul_mv_f16_f16.cl mul_mv_f16_f32.cl mul_mv_f16_f32_1row.cl mul_mv_f16_f32_l4.cl mul_mv_f32_f32.cl mul_mv_id_mxfp4_f32.cl mul_mv_id_mxfp4_f32_flat.cl mul_mv_id_q4_0_f32_8x_flat.cl mul_mv_id_q8_0_f32.cl mul_mv_id_q8_0_f32_flat.cl mul_mv_mxfp4_f32.cl mul_mv_mxfp4_f32_flat.cl mul_mv_q4_0_f32.cl mul_mv_q4_0_f32_1d_16x_flat.cl mul_mv_q4_0_f32_1d_8x_flat.cl mul_mv_q4_0_f32_8x_flat.cl mul_mv_q4_0_f32_v.cl mul_mv_q4_k_f32.cl mul_mv_q6_k_f32.cl mul_mv_q6_k_f32_flat.cl mul_mv_q8_0_f32.cl mul_mv_q8_0_f32_flat.cl norm.cl pad.cl relu.cl repeat.cl rms_norm.cl rope.cl scale.cl set_rows.cl sigmoid.cl silu.cl softmax_4_f16.cl softmax_4_f32.cl softmax_f16.cl softmax_f32.cl softplus.cl solve_tri.cl sqr.cl sqrt.cl ssm_conv.cl sub.cl sum_rows.cl tanh.cl transpose.cl tri.cl tsembd.cl upscale.cl
CMakeLists.txt ggml-opencl.cpp
ggml-rpc CMakeLists.txt ggml-rpc.cpp
ggml-sycl
dpct helper.hpp
CMakeLists.txt add-id.cpp add-id.hpp backend.hpp binbcast.cpp binbcast.hpp common.cpp common.hpp concat.cpp concat.hpp conv.cpp conv.hpp convert.cpp convert.hpp count-equal.cpp count-equal.hpp cpy.cpp cpy.hpp dequantize.hpp dmmv.cpp dmmv.hpp element_wise.cpp element_wise.hpp gemm.hpp getrows.cpp getrows.hpp ggml-sycl.cpp gla.cpp gla.hpp im2col.cpp im2col.hpp mmq.cpp mmq.hpp mmvq.cpp mmvq.hpp norm.cpp norm.hpp outprod.cpp outprod.hpp pad.cpp pad.hpp pad_reflect_1d.cpp pad_reflect_1d.hpp presets.hpp quantize.hpp quants.hpp repeat_back.cpp repeat_back.hpp roll.cpp roll.hpp rope.cpp rope.hpp set.cpp set.hpp set_rows.cpp set_rows.hpp softmax.cpp softmax.hpp ssm_conv.cpp ssm_conv.hpp sycl_hw.cpp sycl_hw.hpp tsembd.cpp tsembd.hpp vecdotq.hpp wkv.cpp wkv.hpp
ggml-virtgpu
backend
shared api_remoting.h apir_backend.gen.h apir_backend.h apir_cs.h apir_cs_ggml.h apir_cs_rpc.h
CMakeLists.txt apir_cs_ggml-rpc-back.cpp backend-convert.h backend-dispatched-backend.cpp backend-dispatched-buffer-type.cpp backend-dispatched-buffer.cpp backend-dispatched-device.cpp backend-dispatched.cpp backend-dispatched.gen.h backend-dispatched.h backend-virgl-apir.h backend.cpp
include apir_hw.h
CMakeLists.txt apir_cs_ggml-rpc-front.cpp ggml-backend-buffer-type.cpp ggml-backend-buffer.cpp ggml-backend-device.cpp ggml-backend-reg.cpp ggml-backend.cpp ggml-remoting.h ggmlremoting_functions.yaml regenerate_remoting.py virtgpu-apir.h virtgpu-forward-backend.cpp virtgpu-forward-buffer-type.cpp virtgpu-forward-buffer.cpp virtgpu-forward-device.cpp virtgpu-forward-impl.h virtgpu-forward.gen.h virtgpu-shm.cpp virtgpu-shm.h virtgpu-utils.cpp virtgpu-utils.h virtgpu.cpp virtgpu.h
ggml-vulkan
cmake host-toolchain.cmake.in
vulkan-shaders
feature-tests bfloat16.comp coopmat.comp coopmat2.comp integer_dot.comp
CMakeLists.txt abs.comp acc.comp add.comp add1.comp add_id.comp arange.comp argmax.comp argsort.comp argsort_large.comp ceil.comp clamp.comp concat.comp contig_copy.comp conv2d_dw.comp conv2d_mm.comp conv_transpose_1d.comp copy.comp copy_from_quant.comp copy_to_quant.comp copy_transpose.comp cos.comp count_equal.comp count_experts.comp cumsum.comp cumsum_multipass1.comp cumsum_multipass2.comp dequant_f32.comp dequant_funcs.glsl dequant_funcs_cm2.glsl dequant_head.glsl dequant_iq1_m.comp dequant_iq1_s.comp dequant_iq2_s.comp dequant_iq2_xs.comp dequant_iq2_xxs.comp dequant_iq3_s.comp dequant_iq3_xxs.comp dequant_iq4_nl.comp dequant_iq4_xs.comp dequant_mxfp4.comp dequant_q2_k.comp dequant_q3_k.comp dequant_q4_0.comp dequant_q4_1.comp dequant_q4_k.comp dequant_q5_0.comp dequant_q5_1.comp dequant_q5_k.comp dequant_q6_k.comp dequant_q8_0.comp diag.comp diag_mask_inf.comp div.comp exp.comp fill.comp flash_attn.comp flash_attn_base.glsl flash_attn_cm1.comp flash_attn_cm2.comp flash_attn_mask_opt.comp flash_attn_split_k_reduce.comp floor.comp geglu.comp geglu_erf.comp geglu_quick.comp gelu.comp gelu_erf.comp gelu_quick.comp generic_binary_head.glsl generic_head.glsl generic_unary_head.glsl get_rows.comp get_rows_quant.comp glu_head.glsl glu_main.glsl group_norm.comp hardsigmoid.comp hardswish.comp im2col.comp im2col_3d.comp l2_norm.comp leaky_relu.comp log.comp mul.comp mul_mat_split_k_reduce.comp mul_mat_vec.comp mul_mat_vec_base.glsl mul_mat_vec_iface.glsl mul_mat_vec_iq1_m.comp mul_mat_vec_iq1_s.comp mul_mat_vec_iq2_s.comp mul_mat_vec_iq2_xs.comp mul_mat_vec_iq2_xxs.comp mul_mat_vec_iq3_s.comp mul_mat_vec_iq3_xxs.comp mul_mat_vec_nc.comp mul_mat_vec_p021.comp mul_mat_vec_q2_k.comp mul_mat_vec_q3_k.comp mul_mat_vec_q4_k.comp mul_mat_vec_q5_k.comp mul_mat_vec_q6_k.comp mul_mat_vecq.comp mul_mat_vecq_funcs.glsl mul_mm.comp mul_mm_cm2.comp mul_mm_funcs.glsl mul_mm_id_funcs.glsl mul_mmq.comp mul_mmq_funcs.glsl mul_mmq_shmem_types.glsl multi_add.comp neg.comp norm.comp opt_step_adamw.comp opt_step_sgd.comp pad.comp pool2d.comp quantize_q8_1.comp reglu.comp relu.comp repeat.comp repeat_back.comp rms_norm.comp rms_norm_back.comp rms_norm_partials.comp roll.comp rope_funcs.glsl rope_head.glsl rope_multi.comp rope_neox.comp rope_norm.comp rope_params.glsl rope_vision.comp round.comp rte.glsl scale.comp sigmoid.comp silu.comp silu_back.comp sin.comp soft_max.comp soft_max_back.comp soft_max_large1.comp soft_max_large2.comp soft_max_large3.comp soft_max_large_common.glsl softplus.comp solve_tri.comp sqrt.comp square.comp ssm_conv.comp ssm_scan.comp step.comp sub.comp sum_rows.comp sum_rows.glsl swiglu.comp swiglu_oai.comp tanh.comp timestep_embedding.comp topk_argsort.comp topk_moe.comp topk_nary_search.comp tri.comp trunc.comp types.glsl upscale.comp utils.glsl vulkan-shaders-gen.cpp wkv6.comp wkv7.comp xielu.comp
CMakeLists.txt ggml-vulkan.cpp
ggml-webgpu
wgsl-shaders argmax.wgsl argsort.wgsl argsort_merge.wgsl binary.wgsl common_decls.tmpl cpy.tmpl.wgsl cumsum.wgsl embed_wgsl.py flash_attn.wgsl get_rows.tmpl.wgsl glu.tmpl.wgsl memset.wgsl mul_mat.tmpl.wgsl mul_mat_decls.tmpl mul_mat_reg_tile.tmpl.wgsl mul_mat_subgroup_matrix.tmpl.wgsl mul_mat_vec.tmpl.wgsl pad.wgsl rms_norm.wgsl rope.tmpl.wgsl scale.tmpl.wgsl set_rows.wgsl soft_max.tmpl.wgsl sum_rows.wgsl unary.wgsl
CMakeLists.txt ggml-webgpu-shader-lib.hpp ggml-webgpu.cpp pre_wgsl.hpp
ggml-zdnn .gitignore CMakeLists.txt common.hpp ggml-zdnn.cpp mmf.cpp mmf.hpp utils.cpp utils.hpp
ggml-zendnn CMakeLists.txt ggml-zendnn.cpp
CMakeLists.txt ggml-alloc.c ggml-backend-dl.cpp ggml-backend-dl.h ggml-backend-impl.h ggml-backend-reg.cpp ggml-backend.cpp ggml-common.h ggml-impl.h ggml-opt.cpp ggml-quants.c ggml-quants.h ggml-threading.cpp ggml-threading.h ggml.c ggml.cpp gguf.cpp
.gitignore CMakeLists.txt
gguf-py
examples reader.py writer.py
gguf
scripts gguf_convert_endian.py gguf_dump.py gguf_editor_gui.py gguf_hash.py gguf_new_metadata.py gguf_set_metadata.py
__init__.py constants.py gguf.py gguf_reader.py gguf_writer.py lazy.py metadata.py py.typed quants.py tensor_mapping.py utility.py vocab.py
tests __init__.py test_metadata.py test_quants.py
LICENSE README.md pyproject.toml
grammars README.md arithmetic.gbnf c.gbnf chess.gbnf english.gbnf japanese.gbnf json.gbnf json_arr.gbnf list.gbnf
include llama-cpp.h llama.h
licenses LICENSE-jsonhpp
media llama0-banner.png llama0-logo.png llama1-banner.png llama1-icon-transparent.png llama1-icon-transparent.svg llama1-icon.png llama1-icon.svg llama1-logo.png llama1-logo.svg matmul.png matmul.svg
models
templates Apertus-8B-Instruct.jinja ByteDance-Seed-OSS.jinja CohereForAI-c4ai-command-r-plus-tool_use.jinja CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja GLM-4.6.jinja Kimi-K2-Instruct.jinja Kimi-K2-Thinking.jinja MiMo-VL.jinja MiniMax-M2.jinja Mistral-Small-3.2-24B-Instruct-2506.jinja NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja NVIDIA-Nemotron-Nano-v2.jinja NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja Qwen-QwQ-32B.jinja Qwen-Qwen2.5-7B-Instruct.jinja Qwen-Qwen3-0.6B.jinja Qwen3-Coder.jinja README.md deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja deepseek-ai-DeepSeek-V3.1.jinja fireworks-ai-llama-3-firefunction-v2.jinja google-gemma-2-2b-it.jinja ibm-granite-granite-3.3-2B-Instruct.jinja llama-cpp-deepseek-r1.jinja llama-cpp-lfm2.jinja llama-cpp-rwkv-world.jinja meetkai-functionary-medium-v3.1.jinja meetkai-functionary-medium-v3.2.jinja meta-llama-Llama-3.1-8B-Instruct.jinja meta-llama-Llama-3.2-3B-Instruct.jinja meta-llama-Llama-3.3-70B-Instruct.jinja microsoft-Phi-3.5-mini-instruct.jinja mistralai-Ministral-3-14B-Reasoning-2512.jinja mistralai-Mistral-Nemo-Instruct-2407.jinja moonshotai-Kimi-K2.jinja openai-gpt-oss-120b.jinja unsloth-Apriel-1.5.jinja unsloth-mistral-Devstral-Small-2507.jinja upstage-Solar-Open-100B.jinja
.editorconfig ggml-vocab-aquila.gguf ggml-vocab-baichuan.gguf ggml-vocab-bert-bge.gguf ggml-vocab-bert-bge.gguf.inp ggml-vocab-bert-bge.gguf.out ggml-vocab-command-r.gguf ggml-vocab-command-r.gguf.inp ggml-vocab-command-r.gguf.out ggml-vocab-deepseek-coder.gguf ggml-vocab-deepseek-coder.gguf.inp ggml-vocab-deepseek-coder.gguf.out ggml-vocab-deepseek-llm.gguf ggml-vocab-deepseek-llm.gguf.inp ggml-vocab-deepseek-llm.gguf.out ggml-vocab-falcon.gguf ggml-vocab-falcon.gguf.inp ggml-vocab-falcon.gguf.out ggml-vocab-gpt-2.gguf ggml-vocab-gpt-2.gguf.inp ggml-vocab-gpt-2.gguf.out ggml-vocab-gpt-neox.gguf ggml-vocab-llama-bpe.gguf ggml-vocab-llama-bpe.gguf.inp ggml-vocab-llama-bpe.gguf.out ggml-vocab-llama-spm.gguf ggml-vocab-llama-spm.gguf.inp ggml-vocab-llama-spm.gguf.out ggml-vocab-mpt.gguf ggml-vocab-mpt.gguf.inp ggml-vocab-mpt.gguf.out ggml-vocab-nomic-bert-moe.gguf ggml-vocab-phi-3.gguf ggml-vocab-phi-3.gguf.inp ggml-vocab-phi-3.gguf.out ggml-vocab-qwen2.gguf ggml-vocab-qwen2.gguf.inp ggml-vocab-qwen2.gguf.out ggml-vocab-refact.gguf ggml-vocab-refact.gguf.inp ggml-vocab-refact.gguf.out ggml-vocab-starcoder.gguf ggml-vocab-starcoder.gguf.inp ggml-vocab-starcoder.gguf.out
pocs
vdot CMakeLists.txt q8dot.cpp vdot.cpp
CMakeLists.txt
requirements requirements-all.txt requirements-compare-llama-bench.txt requirements-convert_hf_to_gguf.txt requirements-convert_hf_to_gguf_update.txt requirements-convert_legacy_llama.txt requirements-convert_llama_ggml_to_gguf.txt requirements-convert_lora_to_gguf.txt requirements-gguf_editor_gui.txt requirements-pydantic.txt requirements-server-bench.txt requirements-test-tokenizer-random.txt requirements-tool_bench.txt
scripts
apple validate-apps.sh validate-ios.sh validate-macos.sh validate-tvos.sh validate-visionos.sh
jinja jinja-tester.py requirements.txt
snapdragon
adb llama-cli.farf run-bench.sh run-cli.sh run-completion.sh run-mtmd.sh run-tool.sh
qdc
tests test_bench.py
readme.md requirements.txt
windows run-bench.ps1 run-cli.ps1 run-tool.ps1 setup-build.ps1
bench-models.sh build-info.sh check-requirements.sh compare-commits.sh compare-llama-bench.py compare-logprobs.py create_ops_docs.py debug-test.sh fetch_server_test_models.py gen-authors.sh gen-unicode-data.py get-flags.mk get-hellaswag.sh get-pg.sh get-wikitext-103.sh get-wikitext-2.sh get-winogrande.sh get_chat_template.py hf.sh install-oneapi.bat pr2wt.sh serve-static.js server-bench.py sync-ggml-am.sh sync-ggml.last sync-ggml.sh sync_vendor.py tool_bench.py tool_bench.sh verify-checksum-models.py xxd.cmake
src
models afmoe.cpp apertus.cpp arcee.cpp arctic.cpp arwkv7.cpp baichuan.cpp bailingmoe.cpp bailingmoe2.cpp bert.cpp bitnet.cpp bloom.cpp chameleon.cpp chatglm.cpp codeshell.cpp cogvlm.cpp cohere2-iswa.cpp command-r.cpp dbrx.cpp deci.cpp deepseek.cpp deepseek2.cpp dots1.cpp dream.cpp ernie4-5-moe.cpp ernie4-5.cpp exaone-moe.cpp exaone.cpp exaone4.cpp falcon-h1.cpp falcon.cpp gemma-embedding.cpp gemma.cpp gemma2-iswa.cpp gemma3.cpp gemma3n-iswa.cpp glm4-moe.cpp glm4.cpp gpt2.cpp gptneox.cpp granite-hybrid.cpp granite.cpp graph-context-mamba.cpp grok.cpp grovemoe.cpp hunyuan-dense.cpp hunyuan-moe.cpp internlm2.cpp jais.cpp jamba.cpp kimi-linear.cpp lfm2.cpp llada-moe.cpp llada.cpp llama-iswa.cpp llama.cpp maincoder.cpp mamba.cpp mimo2-iswa.cpp minicpm3.cpp minimax-m2.cpp mistral3.cpp models.h modern-bert.cpp mpt.cpp nemotron-h.cpp nemotron.cpp neo-bert.cpp olmo.cpp olmo2.cpp olmoe.cpp openai-moe-iswa.cpp openelm.cpp orion.cpp pangu-embedded.cpp phi2.cpp phi3.cpp plamo.cpp plamo2.cpp plamo3.cpp plm.cpp qwen.cpp qwen2.cpp qwen2moe.cpp qwen2vl.cpp qwen3.cpp qwen35.cpp qwen35moe.cpp qwen3moe.cpp qwen3next.cpp qwen3vl-moe.cpp qwen3vl.cpp refact.cpp rnd1.cpp rwkv6-base.cpp rwkv6.cpp rwkv6qwen2.cpp rwkv7-base.cpp rwkv7.cpp seed-oss.cpp smallthinker.cpp smollm3.cpp stablelm.cpp starcoder.cpp starcoder2.cpp step35-iswa.cpp t5-dec.cpp t5-enc.cpp wavtokenizer-dec.cpp xverse.cpp
CMakeLists.txt llama-adapter.cpp llama-adapter.h llama-arch.cpp llama-arch.h llama-batch.cpp llama-batch.h llama-chat.cpp llama-chat.h llama-context.cpp llama-context.h llama-cparams.cpp llama-cparams.h llama-grammar.cpp llama-grammar.h llama-graph.cpp llama-graph.h llama-hparams.cpp llama-hparams.h llama-impl.cpp llama-impl.h llama-io.cpp llama-io.h llama-kv-cache-iswa.cpp llama-kv-cache-iswa.h llama-kv-cache.cpp llama-kv-cache.h llama-kv-cells.h llama-memory-hybrid-iswa.cpp llama-memory-hybrid-iswa.h llama-memory-hybrid.cpp llama-memory-hybrid.h llama-memory-recurrent.cpp llama-memory-recurrent.h llama-memory.cpp llama-memory.h llama-mmap.cpp llama-mmap.h llama-model-loader.cpp llama-model-loader.h llama-model-saver.cpp llama-model-saver.h llama-model.cpp llama-model.h llama-quant.cpp llama-quant.h llama-sampler.cpp llama-sampler.h llama-vocab.cpp llama-vocab.h llama.cpp unicode-data.cpp unicode-data.h unicode.cpp unicode.h
tests
peg-parser simple-tokenize.cpp simple-tokenize.h test-basic.cpp test-gbnf-generation.cpp test-json-parser.cpp test-json-serialization.cpp test-unicode.cpp tests.h
.gitignore CMakeLists.txt get-model.cpp get-model.h run-json-schema-to-grammar.mjs test-alloc.cpp test-arg-parser.cpp test-autorelease.cpp test-backend-ops.cpp test-backend-sampler.cpp test-barrier.cpp test-c.c test-chat-parser.cpp test-chat-peg-parser.cpp test-chat-template.cpp test-chat.cpp test-double-float.cpp test-gbnf-validator.cpp test-gguf.cpp test-grammar-integration.cpp test-grammar-llguidance.cpp test-grammar-parser.cpp test-jinja.cpp test-json-partial.cpp test-json-schema-to-grammar.cpp test-llama-grammar.cpp test-log.cpp test-lora-conversion-inference.sh test-model-load-cancel.cpp test-mtmd-c-api.c test-opt.cpp test-peg-parser.cpp test-quantize-fns.cpp test-quantize-perf.cpp test-quantize-stats.cpp test-regex-partial.cpp test-rope.cpp test-sampling.cpp test-state-restore-fragmented.cpp test-thread-safety.cpp test-tokenizer-0.cpp test-tokenizer-0.py test-tokenizer-0.sh test-tokenizer-1-bpe.cpp test-tokenizer-1-spm.cpp test-tokenizer-random.py test-tokenizers-repo.sh testing.h
tools
batched-bench CMakeLists.txt README.md batched-bench.cpp
cli CMakeLists.txt README.md cli.cpp
completion CMakeLists.txt README.md completion.cpp
cvector-generator CMakeLists.txt README.md completions.txt cvector-generator.cpp mean.hpp negative.txt pca.hpp positive.txt
export-lora CMakeLists.txt README.md export-lora.cpp
fit-params CMakeLists.txt README.md fit-params.cpp
gguf-split CMakeLists.txt README.md gguf-split.cpp tests.sh
imatrix CMakeLists.txt README.md imatrix.cpp
llama-bench CMakeLists.txt README.md llama-bench.cpp
mtmd
legacy-models convert_image_encoder_to_gguf.py glmedge-convert-image-encoder-to-gguf.py glmedge-surgery.py llava_surgery.py llava_surgery_v2.py minicpmv-convert-image-encoder-to-gguf.py minicpmv-surgery.py
models cogvlm.cpp conformer.cpp glm4v.cpp internvl.cpp kimik25.cpp kimivl.cpp llama4.cpp llava.cpp minicpmv.cpp mobilenetv5.cpp models.h pixtral.cpp qwen2vl.cpp qwen3vl.cpp siglip.cpp whisper-enc.cpp youtuvl.cpp
CMakeLists.txt README.md clip-graph.h clip-impl.h clip-model.h clip.cpp clip.h deprecation-warning.cpp mtmd-audio.cpp mtmd-audio.h mtmd-cli.cpp mtmd-helper.cpp mtmd-helper.h mtmd.cpp mtmd.h requirements.txt test-1.jpeg test-2.mp3 tests.sh
perplexity CMakeLists.txt README.md perplexity.cpp
quantize CMakeLists.txt README.md quantize.cpp tests.sh
rpc CMakeLists.txt README.md rpc-server.cpp
server
bench README.md bench.py prometheus.yml requirements.txt script.js
public index.html.gz loading.html
public_legacy colorthemes.css completion.js favicon.ico index-new.html index.html index.js json-schema-to-grammar.mjs loading.html prompt-formats.js style.css system-prompts.js theme-beeninorder.css theme-ketivah.css theme-mangotango.css theme-playground.css theme-polarnight.css theme-snowstorm.css
public_simplechat datautils.mjs index.html readme.md simplechat.css simplechat.js simplechat_screens.webp ui.mjs
tests
unit test_basic.py test_chat_completion.py test_compat_anthropic.py test_compat_oai_responses.py test_completion.py test_ctx_shift.py test_embedding.py test_infill.py test_lora.py test_rerank.py test_router.py test_security.py test_sleep.py test_slot_save.py test_speculative.py test_template.py test_tokenize.py test_tool_call.py test_vision_api.py
.gitignore README.md conftest.py pytest.ini requirements.txt tests.sh utils.py
themes
buttons-top README.md buttons_top.png favicon.ico index.html
wild README.md favicon.ico index.html llama_cpp.png llamapattern.png wild.png
README.md
webui
.storybook ModeWatcherDecorator.svelte TooltipProviderDecorator.svelte main.ts preview.ts vitest.setup.ts
docs
architecture high-level-architecture-simplified.md high-level-architecture.md
flows chat-flow.md conversations-flow.md data-flow-simplified-model-mode.md data-flow-simplified-router-mode.md database-flow.md models-flow.md server-flow.md settings-flow.md
scripts dev.sh install-git-hooks.sh post-build.sh
src
lib
components
app
chat
ChatAttachments ChatAttachmentPreview.svelte ChatAttachmentThumbnailFile.svelte ChatAttachmentThumbnailImage.svelte ChatAttachmentsList.svelte ChatAttachmentsViewAll.svelte
ChatForm
ChatFormActions ChatFormActionFileAttachments.svelte ChatFormActionRecord.svelte ChatFormActionSubmit.svelte ChatFormActions.svelte
ChatForm.svelte ChatFormFileInputInvisible.svelte ChatFormHelperText.svelte ChatFormTextarea.svelte
ChatMessages ChatMessage.svelte ChatMessageActions.svelte ChatMessageAssistant.svelte ChatMessageBranchingControls.svelte ChatMessageEditForm.svelte ChatMessageStatistics.svelte ChatMessageSystem.svelte ChatMessageThinkingBlock.svelte ChatMessageUser.svelte ChatMessages.svelte
ChatScreen ChatScreen.svelte ChatScreenDragOverlay.svelte ChatScreenHeader.svelte ChatScreenProcessingInfo.svelte
ChatSettings ChatSettings.svelte ChatSettingsFields.svelte ChatSettingsFooter.svelte ChatSettingsImportExportTab.svelte ChatSettingsParameterSourceIndicator.svelte
ChatSidebar ChatSidebar.svelte ChatSidebarActions.svelte ChatSidebarConversationItem.svelte ChatSidebarSearch.svelte handle-mobile-sidebar-item-click.ts
dialogs DialogChatAttachmentPreview.svelte DialogChatAttachmentsViewAll.svelte DialogChatError.svelte DialogChatSettings.svelte DialogConfirmation.svelte DialogConversationSelection.svelte DialogConversationTitleUpdate.svelte DialogEmptyFileAlert.svelte DialogModelInformation.svelte DialogModelNotAvailable.svelte
misc ActionButton.svelte ActionDropdown.svelte BadgeChatStatistic.svelte BadgeInfo.svelte BadgeModality.svelte CodePreviewDialog.svelte ConversationSelection.svelte CopyToClipboardIcon.svelte KeyboardShortcutInfo.svelte MarkdownContent.svelte RemoveButton.svelte SearchInput.svelte SyntaxHighlightedCode.svelte
models ModelBadge.svelte ModelsSelector.svelte
server ServerErrorSplash.svelte ServerLoadingSplash.svelte ServerStatus.svelte
index.ts
ui
alert alert-description.svelte alert-title.svelte alert.svelte index.ts
alert-dialog alert-dialog-action.svelte alert-dialog-cancel.svelte alert-dialog-content.svelte alert-dialog-description.svelte alert-dialog-footer.svelte alert-dialog-header.svelte alert-dialog-overlay.svelte alert-dialog-title.svelte alert-dialog-trigger.svelte index.ts
badge badge.svelte index.ts
button button.svelte index.ts
card card-action.svelte card-content.svelte card-description.svelte card-footer.svelte card-header.svelte card-title.svelte card.svelte index.ts
checkbox checkbox.svelte index.ts
collapsible collapsible-content.svelte collapsible-trigger.svelte collapsible.svelte index.ts
dialog dialog-close.svelte dialog-content.svelte dialog-description.svelte dialog-footer.svelte dialog-header.svelte dialog-overlay.svelte dialog-title.svelte dialog-trigger.svelte index.ts
dropdown-menu dropdown-menu-checkbox-item.svelte dropdown-menu-content.svelte dropdown-menu-group-heading.svelte dropdown-menu-group.svelte dropdown-menu-item.svelte dropdown-menu-label.svelte dropdown-menu-radio-group.svelte dropdown-menu-radio-item.svelte dropdown-menu-separator.svelte dropdown-menu-shortcut.svelte dropdown-menu-sub-content.svelte dropdown-menu-sub-trigger.svelte dropdown-menu-trigger.svelte index.ts
input index.ts input.svelte
label index.ts label.svelte
popover index.ts popover-close.svelte popover-content.svelte popover-portal.svelte popover-trigger.svelte popover.svelte
scroll-area index.ts scroll-area-scrollbar.svelte scroll-area.svelte
select index.ts select-content.svelte select-group-heading.svelte select-group.svelte select-item.svelte select-label.svelte select-scroll-down-button.svelte select-scroll-up-button.svelte select-separator.svelte select-trigger.svelte
separator index.ts separator.svelte
sheet index.ts sheet-close.svelte sheet-content.svelte sheet-description.svelte sheet-footer.svelte sheet-header.svelte sheet-overlay.svelte sheet-title.svelte sheet-trigger.svelte
sidebar constants.ts context.svelte.ts index.ts sidebar-content.svelte sidebar-footer.svelte sidebar-group-action.svelte sidebar-group-content.svelte sidebar-group-label.svelte sidebar-group.svelte sidebar-header.svelte sidebar-input.svelte sidebar-inset.svelte sidebar-menu-action.svelte sidebar-menu-badge.svelte sidebar-menu-button.svelte sidebar-menu-item.svelte sidebar-menu-skeleton.svelte sidebar-menu-sub-button.svelte sidebar-menu-sub-item.svelte sidebar-menu-sub.svelte sidebar-menu.svelte sidebar-provider.svelte sidebar-rail.svelte sidebar-separator.svelte sidebar-trigger.svelte sidebar.svelte
skeleton index.ts skeleton.svelte
switch index.ts switch.svelte
table index.ts table-body.svelte table-caption.svelte table-cell.svelte table-footer.svelte table-head.svelte table-header.svelte table-row.svelte table.svelte
textarea index.ts textarea.svelte
tooltip index.ts tooltip-content.svelte tooltip-trigger.svelte
utils.ts
constants auto-scroll.ts binary-detection.ts default-context.ts floating-ui-constraints.ts icons.ts input-classes.ts latex-protection.ts literal-html.ts localstorage-keys.ts max-bundle-size.ts precision.ts processing-info.ts settings-config.ts supported-file-types.ts table-html-restorer.ts tooltip-config.ts viewport.ts
enums attachment.ts chat.ts files.ts index.ts model.ts server.ts
hooks is-mobile.svelte.ts use-model-change-validation.svelte.ts use-processing-state.svelte.ts
markdown enhance-code-blocks.ts enhance-links.ts literal-html.ts table-html-restorer.ts
services chat.ts database.ts index.ts models.ts parameter-sync.spec.ts parameter-sync.ts props.ts
stores chat.svelte.ts conversations.svelte.ts models.svelte.ts persisted.svelte.ts server.svelte.ts settings.svelte.ts
types api.d.ts chat.d.ts database.d.ts index.ts models.d.ts settings.d.ts
utils api-headers.ts api-key-validation.ts attachment-display.ts attachment-type.ts audio-recording.ts autoresize-textarea.ts branching.ts browser-only.ts clipboard.ts config-helpers.ts conversation-utils.ts convert-files-to-extra.ts file-preview.ts file-type.ts formatters.ts index.ts is-ime-composing.ts latex-protection.ts modality-file-validation.ts model-names.ts pdf-processing.ts portal-to-body.ts precision.ts process-uploaded-files.ts svg-to-png.ts syntax-highlight-language.ts text-files.ts text.ts webp-to-png.ts
routes
chat
[id] +page.svelte +page.ts
+error.svelte +layout.svelte +page.svelte +page.ts
styles katex-custom.scss
app.css app.d.ts app.html
static favicon.svg loading.html
tests
client
components TestWrapper.svelte
page.svelte.test.ts
e2e demo.test.ts
stories
fixtures
assets 1.jpg beautiful-flowers-lotus.webp example.pdf hf-logo.svg
ai-tutorial.ts api-docs.ts blog-post.ts data-analysis.ts empty.ts math-formulas.ts readme.ts storybook-mocks.ts
ChatForm.stories.svelte ChatMessage.stories.svelte ChatSettings.stories.svelte ChatSidebar.stories.svelte Introduction.mdx MarkdownContent.stories.svelte
unit clipboard.test.ts latex-protection.test.ts model-names.test.ts
.gitignore .npmrc .prettierignore .prettierrc README.md components.json eslint.config.js package-lock.json package.json playwright.config.ts svelte.config.js tsconfig.json vite.config.ts vitest-setup-client.ts
CMakeLists.txt README-dev.md README.md chat-llama2.sh chat.mjs chat.sh server-common.cpp server-common.h server-context.cpp server-context.h server-http.cpp server-http.h server-models.cpp server-models.h server-queue.cpp server-queue.h server-task.cpp server-task.h server.cpp
tokenize CMakeLists.txt tokenize.cpp
tts CMakeLists.txt README.md convert_pt_to_hf.py tts-outetts.py tts.cpp
CMakeLists.txt
vendor
cpp-httplib CMakeLists.txt LICENSE httplib.cpp httplib.h
miniaudio miniaudio.h
nlohmann json.hpp json_fwd.hpp
sheredom subprocess.h
stb stb_image.h
.clang-format .clang-tidy .dockerignore .ecrc .editorconfig .flake8 .gitignore .gitmodules .pre-commit-config.yaml AGENTS.md AUTHORS CLAUDE.md CMakeLists.txt CMakePresets.json CODEOWNERS CONTRIBUTING.md LICENSE Makefile README.md SECURITY.md convert_hf_to_gguf.py convert_hf_to_gguf_update.py convert_llama_ggml_to_gguf.py convert_lora_to_gguf.py flake.lock flake.nix mypy.ini poetry.lock pyproject.toml pyrightconfig.json requirements.txt
maps map1.h map1.txt
papers 2310.11703v2.pdf 2405.14159v2.pdf
prompts lotr.h lotr.txt
.gitignore Dockerfile Makefile README.md compile_flags.txt context.c game.c makext.mk mapeditor.html maps.h minunit.h models.h models.txt nonstd.h npc.c termbox2.h vectordb.c vectordb.h
llama.cpp/ggml/src/ggml-hexagon/htp/main.c raw
   1#pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments"
   2#pragma clang diagnostic ignored "-Wunused-function"
   3
   4#include <HAP_farf.h>
   5#include <HAP_perf.h>
   6#include <AEEStdErr.h>
   7#include <dspqueue.h>
   8#include <HAP_compute_res.h>
   9#include <HAP_etm_config.h>
  10#include <HAP_mem.h>
  11#include <HAP_power.h>
  12#include <HAP_ps.h>
  13#include <qurt.h>
  14#include <qurt_thread.h>
  15#include <remote.h>
  16#include <string.h>
  17
  18#include "hex-dma.h"
  19#include "hex-utils.h"
  20
  21#define GGML_COMMON_DECL_C
  22#include "ggml-common.h"
  23#include "htp-ctx.h"
  24#include "htp-msg.h"
  25#include "htp-ops.h"
  26#include "worker-pool.h"
  27
  28AEEResult htp_iface_open(const char * uri, remote_handle64 * handle) {
  29    struct htp_context * ctx;
  30    int                  err = 0;
  31
  32    ctx = calloc(1, sizeof(*ctx));
  33    if (ctx == NULL) {
  34        return AEE_ENOMEMORY;
  35    }
  36
  37    // Use the context structure as a handle
  38    *handle = (remote_handle64) ctx;
  39
  40    // Enable FARF logs
  41    HAP_setFARFRuntimeLoggingParams(0xffff, NULL, 0);
  42
  43    // Set client class
  44    {
  45        HAP_power_request_t request;
  46        memset(&request, 0, sizeof(HAP_power_request_t));
  47        request.type    = HAP_power_set_apptype;
  48        request.apptype = HAP_POWER_COMPUTE_CLIENT_CLASS;
  49
  50        if ((err = HAP_power_set((void *) ctx, &request)) != 0) {
  51            return err;
  52        }
  53    }
  54
  55    {
  56        HAP_power_request_t request;
  57        memset(&request, 0, sizeof(request));
  58
  59        request.type                              = HAP_power_set_DCVS_v3;
  60        request.dcvs_v3.set_dcvs_enable           = TRUE;
  61        request.dcvs_v3.dcvs_enable               = TRUE;
  62        request.dcvs_v3.dcvs_option               = HAP_DCVS_V2_PERFORMANCE_MODE;
  63        request.dcvs_v3.set_bus_params            = TRUE;
  64        request.dcvs_v3.bus_params.min_corner     = HAP_DCVS_VCORNER_MAX;
  65        request.dcvs_v3.bus_params.max_corner     = HAP_DCVS_VCORNER_MAX;
  66        request.dcvs_v3.bus_params.target_corner  = HAP_DCVS_VCORNER_MAX;
  67        request.dcvs_v3.set_core_params           = TRUE;
  68        request.dcvs_v3.core_params.min_corner    = HAP_DCVS_VCORNER_MAX;
  69        request.dcvs_v3.core_params.max_corner    = HAP_DCVS_VCORNER_MAX;
  70        request.dcvs_v3.core_params.target_corner = HAP_DCVS_VCORNER_MAX;
  71        request.dcvs_v3.set_sleep_disable         = TRUE;
  72        request.dcvs_v3.sleep_disable             = TRUE;
  73        if ((err = HAP_power_set((void *) ctx, &request)) != 0) {
  74            return err;
  75        }
  76
  77        memset(&request, 0, sizeof(request));
  78        request.type         = HAP_power_set_HVX;
  79        request.hvx.power_up = TRUE;
  80        if ((err = HAP_power_set((void *) ctx, &request)) != 0) {
  81            return err;
  82        }
  83    }
  84
  85    {
  86        // Power on HMX
  87        HAP_power_request_t request;
  88        memset(&request, 0, sizeof(HAP_power_request_t));
  89        request.type         = HAP_power_set_HMX;
  90        request.hmx.power_up = TRUE;
  91        FARF(ALWAYS, "Powering HMX on\n");
  92        err = HAP_power_set((void *) &ctx, &request);
  93        if (err != AEE_SUCCESS) {
  94            FARF(ERROR, "Error powering on HMX.");
  95            return err;
  96        }
  97    }
  98
  99    return AEE_SUCCESS;
 100}
 101
 102AEEResult htp_iface_close(remote_handle64 handle) {
 103    struct htp_context * ctx = (struct htp_context *) handle;
 104
 105    if (!ctx) {
 106        return AEE_EBADPARM;
 107    }
 108
 109    if (ctx->queue) {
 110        FARF(ERROR, "Closing handle with queue still open");
 111        return AEE_EITEMBUSY;
 112    }
 113
 114    free(ctx);
 115    return AEE_SUCCESS;
 116}
 117
 118AEEResult htp_iface_enable_etm(remote_handle64 handle) {
 119    int err = HAP_user_etm_enable();
 120    if (err) {
 121        if (err == AEE_EVERSIONNOTSUPPORT) {
 122            FARF(ERROR, "API HAP_user_etm_enable is not supported\n");
 123        } else {
 124            FARF(ERROR, "Error executing HAP_user_etm_enable with error code : 0x%x\n", err);
 125        }
 126    }
 127    return err;
 128}
 129
 130AEEResult htp_iface_disable_etm(remote_handle64 handle) {
 131    int err = HAP_user_etm_disable();
 132    if (err) {
 133        if (err == AEE_EVERSIONNOTSUPPORT) {
 134            FARF(ERROR, "API HAP_user_etm_disable is not supported\n");
 135        } else {
 136            FARF(ERROR, "Error executing HAP_user_etm_disable with error code : 0x%x\n", err);
 137        }
 138    }
 139    return err;
 140}
 141
 142static int vtcm_acquire(struct htp_context * ctx) {
 143    int err;
 144    if (!ctx->vtcm_valid) {
 145        // Temporarily bump thread priority to make sure it's higher than other sessions.
 146        // This way the resource manager will notify the other thread to release VTCM.
 147        // Note that we need to reaquire VTCM at normal priority for this to work next time.
 148        qurt_thread_set_priority(qurt_thread_get_id(), ctx->thread_prio - 10);
 149        err = HAP_compute_res_acquire_cached(ctx->vtcm_rctx, 1000000);
 150        if (err != 0) {
 151            FARF(ERROR, "Failed to acquire VTCM: 0x%08x", (unsigned)err);
 152            abort();
 153        }
 154        HAP_compute_res_release_cached(ctx->vtcm_rctx);
 155        qurt_thread_set_priority(qurt_thread_get_id(), ctx->thread_prio);
 156
 157        err = HAP_compute_res_acquire_cached(ctx->vtcm_rctx, 1000000);
 158        if (err != 0) {
 159            FARF(ERROR, "Failed to acquire VTCM: 0x%08x", (unsigned)err);
 160            abort();
 161        }
 162        ctx->vtcm_valid = true;
 163    }
 164
 165    ctx->vtcm_inuse = true;
 166    return 0;
 167}
 168
 169static int vtcm_release(struct htp_context * ctx) {
 170    ctx->vtcm_inuse = false;
 171
 172    if (ctx->vtcm_valid && ctx->vtcm_needs_release) {
 173        ctx->vtcm_valid         = false;
 174        ctx->vtcm_needs_release = false;
 175        HAP_compute_res_release_cached(ctx->vtcm_rctx);
 176    }
 177
 178    return 0;
 179}
 180
 181static int vtcm_release_callback(unsigned int rctx, void * state) {
 182    struct htp_context * ctx = (struct htp_context *) state;
 183
 184    if (!ctx || ctx->vtcm_rctx != rctx) {
 185        return AEE_EBADPARM;
 186    }
 187
 188    // If VTCM is not inuse (not processing Ops) release it right here
 189    // otherwise we'll release it once we're done with the current Op.
 190
 191    if (ctx->vtcm_inuse) {
 192        ctx->vtcm_needs_release = false;
 193        return 0;
 194    }
 195
 196    ctx->vtcm_valid = false;
 197    HAP_compute_res_release_cached(ctx->vtcm_rctx);
 198
 199    return 0;
 200}
 201
 202static int vtcm_alloc(struct htp_context * ctx) {
 203    unsigned int vtcm_size = 8 * 1024 * 1024;  // 8MB default
 204    HAP_compute_res_query_VTCM(0, &vtcm_size, NULL, NULL, NULL);
 205
 206    compute_res_attr_t attr;
 207    HAP_compute_res_attr_init(&attr);
 208    HAP_compute_res_attr_set_serialize(&attr, 0);
 209    HAP_compute_res_attr_set_cache_mode(&attr, 1);
 210    HAP_compute_res_attr_set_vtcm_param_v2(&attr, vtcm_size, 0, vtcm_size);
 211    HAP_compute_res_attr_set_release_callback(&attr, vtcm_release_callback, (void *) ctx);
 212    HAP_compute_res_attr_set_hmx_param(&attr, 1);
 213
 214    // Allocate VTCM for scratch pads
 215    uint32_t rctx = HAP_compute_res_acquire(&attr, 1000000 /* timeout */);
 216    if (!rctx) {
 217        FARF(ERROR, "failed to allocate %zu bytes VTCM\n", ctx->vtcm_size);
 218        return AEE_ENOMEMORY;
 219    }
 220
 221    void * vtcm_ptr;
 222    if (HAP_compute_res_attr_get_vtcm_ptr_v2(&attr, &vtcm_ptr, &vtcm_size) != 0) {
 223        HAP_compute_res_release(rctx);
 224        FARF(ERROR, "failed to allocate %zu bytes VTCM (new)\n", ctx->vtcm_size);
 225        return AEE_ENOMEMORY;
 226    }
 227
 228    ctx->vtcm_base          = (uint8_t *) vtcm_ptr;
 229    ctx->vtcm_size          = vtcm_size;
 230    ctx->vtcm_rctx          = rctx;
 231    ctx->vtcm_valid         = false;
 232    ctx->vtcm_inuse         = false;
 233    ctx->vtcm_needs_release = false;
 234
 235    return 0;
 236}
 237
 238static void vtcm_free(struct htp_context * ctx) {
 239    if (ctx->vtcm_rctx) {
 240        HAP_compute_res_release(ctx->vtcm_rctx);
 241        ctx->vtcm_base = 0;
 242        ctx->vtcm_rctx = 0;
 243    }
 244}
 245
 246static void htp_packet_callback(dspqueue_t queue, int error, void * context);
 247static void htp_error_callback(dspqueue_t queue, int error, void * context);
 248
 249AEEResult htp_iface_start(remote_handle64 handle, uint32 sess_id, uint64 dsp_queue_id, uint32 n_hvx) {
 250    struct htp_context * ctx = (struct htp_context *) handle;
 251
 252    if (!ctx) {
 253        return AEE_EBADPARM;
 254    }
 255
 256    if (ctx->queue) {
 257        FARF(ERROR, "Queue already open");
 258        return AEE_EITEMBUSY;
 259    }
 260
 261    // Import queue created on the CPU
 262    int err = dspqueue_import(dsp_queue_id,         // Queue ID from dspqueue_export
 263                              htp_packet_callback,  // Packet callback
 264                              htp_error_callback,   // Error callback; no errors expected on the DSP
 265                              (void *) ctx,         // Callback context
 266                              &ctx->queue);
 267
 268    if (err) {
 269        FARF(ERROR, "Queue import failed with 0x%08x", (unsigned) err);
 270        return err;
 271    }
 272
 273    ctx->thread_id   = qurt_thread_get_id();
 274    ctx->thread_prio = qurt_thread_get_priority(ctx->thread_id);
 275
 276    // allocate VTCM
 277    err = vtcm_alloc(ctx);
 278    if (err != AEE_SUCCESS) {
 279        FARF(ERROR, "Unable to allocate VTCM");
 280        return AEE_ENOMEMORY;
 281    }
 282
 283    qurt_sysenv_max_hthreads_t hw_threads;
 284    qurt_sysenv_get_max_hw_threads(&hw_threads);
 285    uint32_t hw_nhvx = (qurt_hvx_get_units() >> 8) & 0xFF;
 286
 287    if (n_hvx == 0) {
 288        n_hvx = hw_nhvx;
 289    }
 290    if (n_hvx > hw_threads.max_hthreads) {
 291        n_hvx = hw_threads.max_hthreads;
 292    }
 293    if (n_hvx > HTP_MAX_NTHREADS) {
 294        n_hvx = HTP_MAX_NTHREADS;
 295    }
 296
 297    ctx->n_threads = n_hvx;
 298    for (int i = 0; i < ctx->n_threads; i++) {
 299        // see discussion https://github.com/ggml-org/llama.cpp/pull/18151#discussion_r2632388541
 300        ctx->dma[i] = dma_queue_create(64);
 301    }
 302
 303    // init worker pool
 304    err = worker_pool_init(&ctx->worker_pool, n_hvx);
 305    if (err != AEE_SUCCESS) {
 306        FARF(ERROR, "Unable to create worker pool");
 307        return err;
 308    }
 309
 310    FARF(HIGH, "session %u started: n-hvx %u vtcm-size %zu vtcm-rctx %u n-threads %u thread-id %d thread-prio %d \n",
 311         sess_id, hw_nhvx, ctx->vtcm_size, ctx->vtcm_rctx, ctx->n_threads, ctx->thread_id, ctx->thread_prio);
 312
 313    return AEE_SUCCESS;
 314}
 315
 316AEEResult htp_iface_stop(remote_handle64 handle) {
 317    struct htp_context * ctx = (struct htp_context *) handle;
 318    if (!ctx) {
 319        return AEE_EBADPARM;
 320    }
 321
 322    if (!ctx->queue) {
 323        FARF(ERROR, "Queue not open");
 324        return AEE_EBADSTATE;
 325    }
 326
 327    // Close queue. dspqueue_close() will also wait for callbacks to finish.
 328    int err    = dspqueue_close(ctx->queue);
 329    ctx->queue = NULL;
 330    if (err != 0) {
 331        FARF(ERROR, "Queue close failed with 0x%08x", (unsigned) err);
 332        return err;
 333    }
 334
 335    if (ctx->worker_pool) {
 336        // Release worker pool
 337        worker_pool_release(&ctx->worker_pool);
 338    }
 339
 340    for (int i = 0; i < ctx->n_threads; i++) {
 341        dma_queue_delete(ctx->dma[i]);
 342    }
 343
 344    vtcm_free(ctx);
 345
 346    return AEE_SUCCESS;
 347}
 348
 349static void htp_error_callback(dspqueue_t queue, int error, void * context) {
 350    // No errors expected on the DSP.
 351    FARF(ERROR, "Error callback: 0x%08x", (unsigned) error);
 352}
 353
 354struct profile_data {
 355    uint64_t usecs;
 356    uint64_t cycles;
 357    uint64_t pkts;
 358};
 359
 360static inline void profile_start(struct profile_data * d) {
 361    d->usecs  = HAP_perf_get_qtimer_count();
 362    d->cycles = hex_get_cycles();
 363    d->pkts   = hex_get_pktcnt();
 364}
 365
 366static inline void profile_stop(struct profile_data * d) {
 367    d->usecs  = HAP_perf_qtimer_count_to_us(HAP_perf_get_qtimer_count() - d->usecs);
 368    d->cycles = hex_get_cycles() - d->cycles;
 369    d->pkts   = hex_get_pktcnt() - d->pkts;
 370}
 371
 372static int send_htp_rsp(struct htp_context *     c,
 373                        uint32_t                 op,
 374                        uint32_t                 status,
 375                        struct dspqueue_buffer * bufs,
 376                        size_t                   n_bufs,
 377                        struct profile_data *    prof) {
 378    // Prep response struct
 379    struct htp_general_rsp rsp;
 380    rsp.op          = op;
 381    rsp.status      = status;
 382    rsp.prof_usecs  = prof->usecs;
 383    rsp.prof_cycles = prof->cycles;
 384    rsp.prof_pkts   = prof->pkts;
 385
 386    int err = dspqueue_write(c->queue,
 387                             0,                       // Flags
 388                             n_bufs,
 389                             bufs,                    // Buffer references
 390                             sizeof(rsp),
 391                             (const uint8_t *) &rsp,  // Message
 392                             DSPQUEUE_TIMEOUT_NONE);
 393
 394    if (err != 0) {
 395        FARF(ERROR, "dspqueue_write failed: 0x%08x", (unsigned) err);
 396    }
 397
 398    return err;
 399}
 400
 401static void proc_matmul_req(struct htp_context *     ctx,
 402                            struct htp_general_req * req,
 403                            struct dspqueue_buffer * bufs,
 404                            size_t                   n_bufs) {
 405    struct dspqueue_buffer rsp_bufs[1];
 406
 407    // We had written to the output buffer, we'd also need to flush it
 408    rsp_bufs[0].fd     = bufs[2].fd;
 409    rsp_bufs[0].ptr    = bufs[2].ptr;
 410    rsp_bufs[0].size   = bufs[2].size;
 411    rsp_bufs[0].offset = bufs[2].offset;
 412    rsp_bufs[0].flags  = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER |         // Flush HTP
 413                         DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT);  // Invalidate CPU
 414
 415    // Setup Op context
 416    struct htp_ops_context octx = { 0 };
 417    octx.ctx                    = ctx;
 418    octx.src0                   = req->src0;
 419    octx.src1                   = req->src1;
 420    octx.dst                    = req->dst;
 421    octx.flags                  = req->flags;
 422    octx.op                     = req->op;
 423
 424    // Update data pointers
 425    octx.src0.data = (uint32_t) bufs[0].ptr;
 426    octx.src1.data = (uint32_t) bufs[1].ptr;
 427    octx.dst.data  = (uint32_t) bufs[2].ptr;
 428    octx.n_threads = ctx->n_threads;
 429
 430    struct profile_data prof;
 431    profile_start(&prof);
 432
 433    uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
 434    if (vtcm_acquire(ctx) == AEE_SUCCESS) {
 435        rsp_status = op_matmul(&octx);
 436        vtcm_release(ctx);
 437    }
 438
 439    profile_stop(&prof);
 440    send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
 441}
 442
 443static void proc_argsort_req(struct htp_context * ctx, struct htp_general_req * req, struct dspqueue_buffer * bufs) {
 444    struct dspqueue_buffer rsp_bufs[1];
 445
 446    // We had written to the output buffer, we'd also need to flush it
 447    rsp_bufs[0].fd     = bufs[1].fd;
 448    rsp_bufs[0].ptr    = bufs[1].ptr;
 449    rsp_bufs[0].offset = bufs[1].offset;
 450    rsp_bufs[0].size   = bufs[1].size;
 451    rsp_bufs[0].flags  = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER |         // Flush HTP
 452                         DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT);  // Invalidate CPU
 453
 454    // Setup Op context
 455    struct htp_ops_context octx = { 0 };
 456    octx.ctx                    = ctx;
 457    octx.src0                   = req->src0;
 458    octx.dst                    = req->dst;
 459    octx.flags                  = req->flags;
 460    octx.op                     = req->op;
 461
 462    memcpy(octx.op_params, req->op_params, sizeof(octx.op_params));
 463
 464    // Update data pointers
 465    octx.src0.data = (uint32_t) bufs[0].ptr;
 466    octx.dst.data  = (uint32_t) bufs[1].ptr;
 467    octx.n_threads = ctx->n_threads;
 468
 469    struct profile_data prof;
 470    profile_start(&prof);
 471
 472    uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
 473    if (vtcm_acquire(ctx) == AEE_SUCCESS) {
 474        rsp_status = op_argsort(&octx);
 475        vtcm_release(ctx);
 476    }
 477
 478    profile_stop(&prof);
 479    send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
 480}
 481
 482static void proc_cpy_req(struct htp_context * ctx, struct htp_general_req * req, struct dspqueue_buffer * bufs) {
 483    struct dspqueue_buffer rsp_bufs[1];
 484
 485    // We had written to the output buffer, we'd also need to flush it
 486    rsp_bufs[0].fd     = bufs[1].fd;
 487    rsp_bufs[0].ptr    = bufs[1].ptr;
 488    rsp_bufs[0].offset = bufs[1].offset;
 489    rsp_bufs[0].size   = bufs[1].size;
 490    rsp_bufs[0].flags  = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER |         // Flush HTP
 491                         DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT);  // Invalidate CPU
 492
 493    // Setup Op context
 494    struct htp_ops_context octx = { 0 };
 495    octx.ctx                    = ctx;
 496    octx.src0                   = req->src0;
 497    octx.dst                    = req->dst;
 498    octx.flags                  = req->flags;
 499    octx.op                     = req->op;
 500
 501    // Update data pointers
 502    octx.src0.data = (uint32_t) bufs[0].ptr;
 503    octx.dst.data  = (uint32_t) bufs[1].ptr;
 504    octx.n_threads = ctx->n_threads;
 505
 506    struct profile_data prof;
 507    profile_start(&prof);
 508
 509    uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
 510    if (vtcm_acquire(ctx) == AEE_SUCCESS) {
 511        rsp_status = op_cpy(&octx);
 512        vtcm_release(ctx);
 513    }
 514
 515    profile_stop(&prof);
 516    send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
 517}
 518
 519static void proc_get_rows_req(struct htp_context * ctx, struct htp_general_req * req, struct dspqueue_buffer * bufs) {
 520    struct dspqueue_buffer rsp_bufs[1];
 521
 522    // We had written to the output buffer, we'd also need to flush it
 523    rsp_bufs[0].fd     = bufs[2].fd;
 524    rsp_bufs[0].ptr    = bufs[2].ptr;
 525    rsp_bufs[0].offset = bufs[2].offset;
 526    rsp_bufs[0].size   = bufs[2].size;
 527    rsp_bufs[0].flags  = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER |         // Flush HTP
 528                         DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT);  // Invalidate CPU
 529
 530    // Setup Op context
 531    struct htp_ops_context octx = { 0 };
 532    octx.ctx                    = ctx;
 533    octx.src0                   = req->src0;
 534    octx.src1                   = req->src1;
 535    octx.dst                    = req->dst;
 536    octx.flags                  = req->flags;
 537    octx.op                     = req->op;
 538
 539    // Update data pointers
 540    octx.src0.data = (uint32_t) bufs[0].ptr;
 541    octx.src1.data = (uint32_t) bufs[1].ptr;
 542    octx.dst.data  = (uint32_t) bufs[2].ptr;
 543    octx.n_threads = ctx->n_threads;
 544
 545    struct profile_data prof;
 546    profile_start(&prof);
 547
 548    uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
 549    if (vtcm_acquire(ctx) == AEE_SUCCESS) {
 550        rsp_status = op_get_rows(&octx);
 551        vtcm_release(ctx);
 552    }
 553
 554    profile_stop(&prof);
 555    send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
 556}
 557
 558static void proc_matmul_id_req(struct htp_context *     ctx,
 559                               struct htp_general_req * req,
 560                               struct dspqueue_buffer * bufs,
 561                               size_t                   n_bufs) {
 562    struct dspqueue_buffer rsp_bufs[1];
 563
 564    // We had written to the output buffer, we'd also need to flush it
 565    rsp_bufs[0].fd     = bufs[3].fd;
 566    rsp_bufs[0].ptr    = bufs[3].ptr;
 567    rsp_bufs[0].size   = bufs[3].size;
 568    rsp_bufs[0].offset = bufs[3].offset;
 569    rsp_bufs[0].flags  = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER |         // Flush HTP
 570                         DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT);  // Invalidate CPU
 571
 572    // Setup Op context
 573    struct htp_ops_context octx = { 0 };
 574    octx.ctx                    = ctx;
 575    octx.src0                   = req->src0;
 576    octx.src1                   = req->src1;
 577    octx.src2                   = req->src2;
 578    octx.dst                    = req->dst;
 579    octx.flags                  = req->flags;
 580    octx.op                     = req->op;
 581
 582    // Update data pointers
 583    octx.src0.data = (uint32_t) bufs[0].ptr;
 584    octx.src1.data = (uint32_t) bufs[1].ptr;
 585    octx.src2.data = (uint32_t) bufs[2].ptr;
 586    octx.dst.data  = (uint32_t) bufs[3].ptr;
 587    octx.n_threads = ctx->n_threads;
 588
 589    struct profile_data prof;
 590    profile_start(&prof);
 591
 592    uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
 593    if (vtcm_acquire(ctx) == AEE_SUCCESS) {
 594        rsp_status = op_matmul_id(&octx);
 595        vtcm_release(ctx);
 596    }
 597
 598    profile_stop(&prof);
 599    send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
 600}
 601
 602static void proc_binary_req(struct htp_context * ctx, struct htp_general_req * req, struct dspqueue_buffer * bufs) {
 603    struct dspqueue_buffer rsp_bufs[1];
 604
 605    // We had written to the output buffer, we'd also need to flush it
 606    rsp_bufs[0].fd     = bufs[2].fd;
 607    rsp_bufs[0].ptr    = bufs[2].ptr;
 608    rsp_bufs[0].offset = bufs[2].offset;
 609    rsp_bufs[0].size   = bufs[2].size;
 610    rsp_bufs[0].flags  = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER |         // Flush HTP
 611                         DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT);  // Invalidate CPU
 612
 613    // Setup Op context
 614    struct htp_ops_context octx = { 0 };
 615    octx.ctx                    = ctx;
 616    octx.src0                   = req->src0;
 617    octx.src1                   = req->src1;
 618    octx.dst                    = req->dst;
 619    octx.flags                  = req->flags;
 620    octx.op                     = req->op;
 621
 622    // Update data pointers
 623    octx.src0.data = (uint32_t) bufs[0].ptr;
 624    octx.src1.data = (uint32_t) bufs[1].ptr;
 625    octx.dst.data  = (uint32_t) bufs[2].ptr;
 626    octx.n_threads = ctx->n_threads;
 627
 628    struct profile_data prof;
 629    profile_start(&prof);
 630
 631    uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
 632    if (vtcm_acquire(ctx) == AEE_SUCCESS) {
 633        rsp_status = op_binary(&octx);
 634        vtcm_release(ctx);
 635    }
 636
 637    profile_stop(&prof);
 638    send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
 639}
 640
 641static void proc_add_id_req(struct htp_context * ctx, struct htp_general_req * req, struct dspqueue_buffer * bufs) {
 642    struct dspqueue_buffer rsp_bufs[1];
 643
 644    // We had written to the output buffer, we'd also need to flush it
 645    rsp_bufs[0].fd     = bufs[3].fd;
 646    rsp_bufs[0].ptr    = bufs[3].ptr;
 647    rsp_bufs[0].offset = bufs[3].offset;
 648    rsp_bufs[0].size   = bufs[3].size;
 649    rsp_bufs[0].flags  = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER |         // Flush HTP
 650                         DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT);  // Invalidate CPU
 651
 652    // Setup Op context
 653    struct htp_ops_context octx = { 0 };
 654    octx.ctx                    = ctx;
 655    octx.src0                   = req->src0;
 656    octx.src1                   = req->src1;
 657    octx.src2                   = req->src2;
 658    octx.dst                    = req->dst;
 659    octx.flags                  = req->flags;
 660    octx.op                     = req->op;
 661
 662    // Update data pointers
 663    octx.src0.data = (uint32_t) bufs[0].ptr;
 664    octx.src1.data = (uint32_t) bufs[1].ptr;
 665    octx.src2.data = (uint32_t) bufs[2].ptr;
 666    octx.dst.data  = (uint32_t) bufs[3].ptr;
 667    octx.n_threads = ctx->n_threads;
 668
 669    struct profile_data prof;
 670    profile_start(&prof);
 671
 672    uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
 673    if (vtcm_acquire(ctx) == AEE_SUCCESS) {
 674        rsp_status = op_binary(&octx);
 675        vtcm_release(ctx);
 676    }
 677
 678    profile_stop(&prof);
 679    send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
 680}
 681
 682static void proc_unary_req(struct htp_context * ctx, struct htp_general_req * req, struct dspqueue_buffer * bufs) {
 683    struct dspqueue_buffer rsp_bufs[HTP_MAX_PACKET_BUFFERS];
 684
 685    // We had written to the output buffer, we'd also need to flush it
 686    rsp_bufs[0].fd     = bufs[1].fd;
 687    rsp_bufs[0].ptr    = bufs[1].ptr;
 688    rsp_bufs[0].offset = bufs[1].offset;
 689    rsp_bufs[0].size   = bufs[1].size;
 690    rsp_bufs[0].flags  = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER |         // Flush HTP
 691                         DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT);  // Invalidate CPU
 692
 693    // Setup Op context
 694    struct htp_ops_context octx = { 0 };
 695    octx.ctx                    = ctx;
 696    octx.src0                   = req->src0;
 697    octx.dst                    = req->dst;
 698    octx.flags                  = req->flags;
 699    octx.op                     = req->op;
 700
 701    memcpy(octx.op_params, req->op_params, sizeof(octx.op_params));
 702
 703    // Update data pointers
 704    octx.src0.data = (uint32_t) bufs[0].ptr;
 705    octx.dst.data  = (uint32_t) bufs[1].ptr;
 706    octx.n_threads = ctx->n_threads;
 707
 708    struct profile_data prof;
 709    profile_start(&prof);
 710
 711    uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
 712    if (vtcm_acquire(ctx) == AEE_SUCCESS) {
 713        rsp_status = op_unary(&octx);
 714        vtcm_release(ctx);
 715    }
 716
 717    profile_stop(&prof);
 718    send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
 719}
 720
 721static void proc_sum_rows_req(struct htp_context * ctx, struct htp_general_req * req, struct dspqueue_buffer * bufs) {
 722    struct dspqueue_buffer rsp_bufs[HTP_MAX_PACKET_BUFFERS];
 723
 724    // We had written to the output buffer, we'd also need to flush it
 725    rsp_bufs[0].fd     = bufs[1].fd;
 726    rsp_bufs[0].ptr    = bufs[1].ptr;
 727    rsp_bufs[0].offset = bufs[1].offset;
 728    rsp_bufs[0].size   = bufs[1].size;
 729    rsp_bufs[0].flags  = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER |         // Flush HTP
 730                         DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT);  // Invalidate CPU
 731
 732    // Setup Op context
 733    struct htp_ops_context octx = { 0 };
 734    octx.ctx                    = ctx;
 735    octx.src0                   = req->src0;
 736    octx.dst                    = req->dst;
 737    octx.flags                  = req->flags;
 738    octx.op                     = req->op;
 739
 740    memcpy(octx.op_params, req->op_params, sizeof(octx.op_params));
 741
 742    // Update data pointers
 743    octx.src0.data = (uint32_t) bufs[0].ptr;
 744    octx.dst.data  = (uint32_t) bufs[1].ptr;
 745    octx.n_threads = ctx->n_threads;
 746
 747    struct profile_data prof;
 748    profile_start(&prof);
 749
 750    uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
 751    if (vtcm_acquire(ctx) == AEE_SUCCESS) {
 752        rsp_status = op_sum_rows(&octx);
 753        vtcm_release(ctx);
 754    }
 755
 756    profile_stop(&prof);
 757    send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
 758}
 759
 760static void proc_activations_req(struct htp_context *     ctx,
 761                                 struct htp_general_req * req,
 762                                 struct dspqueue_buffer * bufs,
 763                                 uint32_t                 n_bufs) {
 764    struct dspqueue_buffer rsp_bufs[HTP_MAX_PACKET_BUFFERS];
 765
 766    int write_idx = (n_bufs == 3) ? 2 : 1;
 767
 768    // We had written to the output buffer, we'd also need to flush it
 769    rsp_bufs[0].fd     = bufs[write_idx].fd;
 770    rsp_bufs[0].ptr    = bufs[write_idx].ptr;
 771    rsp_bufs[0].offset = bufs[write_idx].offset;
 772    rsp_bufs[0].size   = bufs[write_idx].size;
 773    rsp_bufs[0].flags  = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER |         // Flush HTP
 774                          DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
 775
 776    // Setup Op context
 777    struct htp_ops_context octx = { 0 };
 778    octx.ctx                    = ctx;
 779    octx.src0                   = req->src0;
 780    if (3 == n_bufs) {
 781        octx.src1 = req->src1;
 782    }
 783    octx.dst   = req->dst;
 784    octx.flags = req->flags;
 785    octx.op    = req->op;
 786
 787    memcpy(octx.op_params, req->op_params, sizeof(octx.op_params));
 788
 789    // Update data pointers
 790    octx.src0.data = (uint32_t) bufs[0].ptr;
 791    if (3 == n_bufs) {
 792        octx.src1.data = (uint32_t) bufs[1].ptr;
 793        octx.dst.data  = (uint32_t) bufs[2].ptr;
 794    } else {
 795        octx.dst.data = (uint32_t) bufs[1].ptr;
 796    }
 797    octx.n_threads = ctx->n_threads;
 798
 799    struct profile_data prof;
 800    profile_start(&prof);
 801
 802    uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
 803    if (vtcm_acquire(ctx) == AEE_SUCCESS) {
 804        if (octx.op == HTP_OP_SOFTMAX) {
 805            rsp_status = op_softmax(&octx);
 806        } else {
 807            rsp_status = op_activations(&octx);
 808        }
 809        vtcm_release(ctx);
 810    }
 811
 812    profile_stop(&prof);
 813    send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
 814}
 815
 816static void proc_rope_req(struct htp_context *     ctx,
 817                          struct htp_general_req * req,
 818                          struct dspqueue_buffer * bufs,
 819                          uint32_t                 n_bufs) {
 820    struct dspqueue_buffer rsp_bufs[HTP_MAX_PACKET_BUFFERS];
 821
 822    int write_idx = n_bufs - 1;
 823
 824    // We had written to the output buffer, we'd also need to flush it
 825    rsp_bufs[0].fd     = bufs[write_idx].fd;
 826    rsp_bufs[0].ptr    = bufs[write_idx].ptr;
 827    rsp_bufs[0].offset = bufs[write_idx].offset;
 828    rsp_bufs[0].size   = bufs[write_idx].size;
 829    rsp_bufs[0].flags  = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER |         // Flush HTP
 830                          DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
 831
 832    // Setup Op context
 833    struct htp_ops_context octx = { 0 };
 834    octx.ctx                    = ctx;
 835    octx.src0                   = req->src0;
 836    octx.src1                   = req->src1;
 837    if (4 == n_bufs) {
 838        octx.src2 = req->src2;
 839    }
 840    octx.dst   = req->dst;
 841    octx.flags = req->flags;
 842    octx.op    = req->op;
 843
 844    memcpy(octx.op_params, req->op_params, sizeof(octx.op_params));
 845
 846    // Update data pointers
 847    octx.src0.data = (uint32_t) bufs[0].ptr;
 848    octx.src1.data = (uint32_t) bufs[1].ptr;
 849    if (4 == n_bufs) {
 850        octx.src2.data = (uint32_t) bufs[2].ptr;
 851        octx.dst.data  = (uint32_t) bufs[3].ptr;
 852    } else {
 853        octx.dst.data = (uint32_t) bufs[2].ptr;
 854    }
 855    octx.n_threads = ctx->n_threads;
 856
 857    struct profile_data prof;
 858    profile_start(&prof);
 859
 860    uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
 861    if (vtcm_acquire(ctx) == AEE_SUCCESS) {
 862        rsp_status = op_rope(&octx);
 863        vtcm_release(ctx);
 864    }
 865
 866    profile_stop(&prof);
 867    send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
 868}
 869
 870static void proc_set_rows_req(struct htp_context * ctx, struct htp_general_req * req, struct dspqueue_buffer * bufs) {
 871    struct dspqueue_buffer rsp_bufs[1];
 872
 873    // We had written to the output buffer, we'd also need to flush it
 874    rsp_bufs[0].fd     = bufs[2].fd;
 875    rsp_bufs[0].ptr    = bufs[2].ptr;
 876    rsp_bufs[0].offset = bufs[2].offset;
 877    rsp_bufs[0].size   = bufs[2].size;
 878    rsp_bufs[0].flags  = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER |         // Flush HTP
 879                         DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT);  // Invalidate CPU
 880
 881    // Setup Op context
 882    struct htp_ops_context octx = { 0 };
 883    octx.ctx                    = ctx;
 884    octx.src0                   = req->src0;
 885    octx.src1                   = req->src1;
 886    octx.dst                    = req->dst;
 887    octx.flags                  = req->flags;
 888    octx.op                     = req->op;
 889
 890    // Update data pointers
 891    octx.src0.data = (uint32_t) bufs[0].ptr;
 892    octx.src1.data = (uint32_t) bufs[1].ptr;
 893    octx.dst.data  = (uint32_t) bufs[2].ptr;
 894    octx.n_threads = ctx->n_threads;
 895
 896    struct profile_data prof;
 897    profile_start(&prof);
 898
 899    uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
 900    if (vtcm_acquire(ctx) == AEE_SUCCESS) {
 901        rsp_status = op_set_rows(&octx);
 902        vtcm_release(ctx);
 903    }
 904
 905    profile_stop(&prof);
 906    send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
 907}
 908
 909static void proc_flash_attn_ext_req(struct htp_context *     ctx,
 910                                    struct htp_general_req * req,
 911                                    struct dspqueue_buffer * bufs,
 912                                    uint32_t                 n_bufs) {
 913    // Setup Op context
 914    struct htp_ops_context octx;
 915    memset(&octx, 0, sizeof(octx));
 916
 917    octx.ctx   = ctx;
 918    octx.n_threads = ctx->n_threads;
 919
 920    octx.src0  = req->src0;
 921    octx.src1  = req->src1;
 922    octx.src2  = req->src2;
 923    octx.src3  = req->src3;
 924    octx.src4  = req->src4;
 925    octx.dst   = req->dst;
 926    octx.flags = req->flags;
 927    octx.op    = req->op;
 928
 929    memcpy(octx.op_params, req->op_params, sizeof(octx.op_params));
 930
 931    // Update data pointers
 932    octx.src0.data = (uint32_t) bufs[0].ptr;
 933    octx.src1.data = (uint32_t) bufs[1].ptr;
 934    octx.src2.data = (uint32_t) bufs[2].ptr;
 935
 936    int last_buf = 3;
 937
 938    if (octx.src3.ne[0]) {
 939        octx.src3.data = (uint32_t) bufs[last_buf++].ptr; // mask is valid
 940    }
 941
 942    if (octx.src4.ne[0]) {
 943        octx.src4.data = (uint32_t) bufs[last_buf++].ptr; // sinks is valid
 944    }
 945
 946    octx.dst.data = (uint32_t) bufs[last_buf].ptr;
 947
 948    struct profile_data prof;
 949    profile_start(&prof);
 950
 951    uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
 952    if (vtcm_acquire(ctx) == AEE_SUCCESS) {
 953        rsp_status = op_flash_attn_ext(&octx);
 954        vtcm_release(ctx);
 955    }
 956
 957    profile_stop(&prof);
 958
 959    struct dspqueue_buffer rsp_buf = bufs[last_buf];
 960    rsp_buf.flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER |         // Flush HTP
 961                     DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
 962
 963    send_htp_rsp(ctx, req->op, rsp_status, &bufs[last_buf], 1, &prof);
 964}
 965
 966static void htp_packet_callback(dspqueue_t queue, int error, void * context) {
 967    struct htp_context * ctx = (struct htp_context *) context;
 968
 969    // Repeatedly read packets from the queue until it's empty. We don't
 970    // necessarily get a separate callback for each packet, and new packets
 971    // may arrive while we're processing the previous one. This ensures we
 972    // keep the DSP busy as much as possible and avoid waiting for the CPU.
 973
 974    while (1) {
 975        struct htp_general_req req;
 976        uint32_t               req_size;
 977
 978        struct dspqueue_buffer bufs[HTP_MAX_PACKET_BUFFERS];
 979        uint32_t               n_bufs;
 980        uint32_t               flags;
 981
 982        // Read packet from queue
 983        int err = dspqueue_read_noblock(queue, &flags,
 984                                        HTP_MAX_PACKET_BUFFERS,  // Maximum number of buffer references
 985                                        &n_bufs,                 // Number of buffer references
 986                                        bufs,                    // Buffer references
 987                                        sizeof(req),             // Max message length
 988                                        &req_size,               // Message length
 989                                        (uint8_t *) &req);       // Message
 990
 991        if (err == AEE_EWOULDBLOCK) {
 992            // Consumed all packets available for now
 993            return;
 994        }
 995
 996        if (err != 0) {
 997            FARF(ERROR, "dspqueue_read_noblock failed: 0x%08x", (unsigned) err);
 998            return;
 999        }
1000
1001        if (req_size != sizeof(req)) {
1002            FARF(ERROR, "Invalid request size");
1003            continue;
1004        }
1005
1006        if (req.flags & HTP_OPFLAGS_EARLY_WAKEUP) {
1007            // Host wants early notification
1008            dspqueue_write_early_wakeup_noblock(ctx->queue, 10, 0);
1009        }
1010
1011        // Process packet based on its message type
1012        switch (req.op) {
1013            case HTP_OP_MUL_MAT:
1014                if (n_bufs != 3) {
1015                    FARF(ERROR, "Bad matmul-req buffer list");
1016                    continue;
1017                }
1018                proc_matmul_req(ctx, &req, bufs, n_bufs);
1019                break;
1020
1021            case HTP_OP_MUL_MAT_ID:
1022                if (n_bufs != 4) {
1023                    FARF(ERROR, "Bad matmul-id-req buffer list");
1024                    continue;
1025                }
1026                proc_matmul_id_req(ctx, &req, bufs, n_bufs);
1027                break;
1028
1029            case HTP_OP_MUL:
1030            case HTP_OP_ADD:
1031            case HTP_OP_SUB:
1032            case HTP_OP_DIV:
1033                if (n_bufs != 3) {
1034                    FARF(ERROR, "Bad binary-req buffer list");
1035                    continue;
1036                }
1037                proc_binary_req(ctx, &req, bufs);
1038                break;
1039
1040            case HTP_OP_RMS_NORM:
1041            case HTP_OP_SCALE:
1042                if (n_bufs != 2) {
1043                    FARF(ERROR, "Bad unary-req buffer list");
1044                    continue;
1045                }
1046
1047                proc_unary_req(ctx, &req, bufs);
1048                break;
1049
1050            case HTP_OP_SQR:
1051            case HTP_OP_SQRT:
1052                if (n_bufs != 2) {
1053                    FARF(ERROR, "Bad unary-req buffer list");
1054                    continue;
1055                }
1056
1057                proc_unary_req(ctx, &req, bufs);
1058                break;
1059
1060            case HTP_OP_SUM_ROWS:
1061                if (n_bufs != 2) {
1062                    FARF(ERROR, "Bad unary-req buffer list");
1063                    continue;
1064                }
1065
1066                proc_sum_rows_req(ctx, &req, bufs);
1067                break;
1068
1069            case HTP_OP_UNARY_SILU:
1070            case HTP_OP_UNARY_GELU:
1071                if (n_bufs != 2) {
1072                    FARF(ERROR, "Bad act-req buffer list");
1073                    continue;
1074                }
1075                proc_activations_req(ctx, &req, bufs, n_bufs);
1076                break;
1077
1078            case HTP_OP_GLU_SWIGLU:
1079            case HTP_OP_GLU_SWIGLU_OAI:
1080            case HTP_OP_SOFTMAX:
1081            case HTP_OP_GLU_GEGLU:
1082                if ((n_bufs != 2) && (n_bufs != 3)) {
1083                    FARF(ERROR, "Bad act-req buffer list");
1084                    continue;
1085                }
1086                proc_activations_req(ctx, &req, bufs, n_bufs);
1087                break;
1088
1089            case HTP_OP_ADD_ID:
1090                if (n_bufs != 4) {
1091                    FARF(ERROR, "Bad add-id-req buffer list");
1092                    continue;
1093                }
1094                proc_add_id_req(ctx, &req, bufs);
1095                break;
1096
1097            case HTP_OP_ROPE:
1098                if ((n_bufs != 3) && (n_bufs != 4)) {
1099                    FARF(ERROR, "Bad rope-req buffer list");
1100                    continue;
1101                }
1102                proc_rope_req(ctx, &req, bufs, n_bufs);
1103                break;
1104
1105            case HTP_OP_FLASH_ATTN_EXT:
1106                if (!(n_bufs >= 4 && n_bufs <= 6)) {
1107                    FARF(ERROR, "Bad flash-attn-ext-req buffer list");
1108                    continue;
1109                }
1110                proc_flash_attn_ext_req(ctx, &req, bufs, n_bufs);
1111                break;
1112
1113            case HTP_OP_SET_ROWS:
1114                if (n_bufs != 3) {
1115                    FARF(ERROR, "Bad set-rows-req buffer list");
1116                    continue;
1117                }
1118                proc_set_rows_req(ctx, &req, bufs);
1119                break;
1120
1121            case HTP_OP_GET_ROWS:
1122                if (n_bufs != 3) {
1123                    FARF(ERROR, "Bad get-rows-req buffer list");
1124                    continue;
1125                }
1126                proc_get_rows_req(ctx, &req, bufs);
1127                break;
1128
1129            case HTP_OP_CPY:
1130                if (n_bufs != 2) {
1131                    FARF(ERROR, "Bad cpy-req buffer list");
1132                    continue;
1133                }
1134                proc_cpy_req(ctx, &req, bufs);
1135                break;
1136
1137            case HTP_OP_ARGSORT:
1138                if (n_bufs != 2) {
1139                    FARF(ERROR, "Bad argsort-req buffer list");
1140                    continue;
1141                }
1142                proc_argsort_req(ctx, &req, bufs);
1143                break;
1144
1145            default:
1146                FARF(ERROR, "Unknown Op %u", req.op);
1147                break;
1148        }
1149    }
1150}