llama.cpp
.devops
nix
apps.nix devshells.nix docker.nix jetson-support.nix nixpkgs-instances.nix package-gguf-py.nix package.nix python-scripts.nix scope.nix sif.nix.github
ISSUE_TEMPLATE
010-bug-compilation.yml 011-bug-results.yml 019-bug-misc.yml 020-enhancement.yml 030-research.yml 040-refactor.yml config.ymlworkflows
bench.yml.disabled build-cache.yml build-cmake-pkg.yml build-linux-cross.yml build.yml check-vendor.yml close-issue.yml copilot-setup-steps.yml docker.yml editorconfig.yml gguf-publish.yml labeler.yml pre-tokenizer-hashes.yml python-check-requirements.yml python-lint.yml python-type-check.yml release.yml server-metal.yml server-webui.yml server.yml update-ops-docs.yml winget.ymlbenches
cmake
arm64-apple-clang.cmake arm64-windows-llvm.cmake build-info.cmake common.cmake download-models.cmake git-vars.cmake license.cmake llama-config.cmake.in llama.pc.in riscv64-spacemit-linux-gnu-gcc.cmake x64-windows-llvm.cmakecommon
jinja
README.md caps.cpp caps.h lexer.cpp lexer.h parser.cpp parser.h runtime.cpp runtime.h string.cpp string.h utils.h value.cpp value.hdocs
multimodal
MobileVLM.md gemma3.md glmedge.md granitevision.md llava.md minicpmo2.6.md minicpmo4.0.md minicpmv2.5.md minicpmv2.6.md minicpmv4.0.md minicpmv4.5.mdops
BLAS.csv CANN.csv CPU.csv CUDA.csv Metal.csv OpenCL.csv SYCL.csv Vulkan.csv WebGPU.csv ZenDNN.csv zDNN.csvexamples
llama.android
app
src
lib
.gitignore build.gradle.kts consumer-rules.pro proguard-rules.promodel-conversion
scripts
causal
compare-embeddings-logits.sh compare-logits.py convert-model.sh modelcard.template run-casual-gen-embeddings-org.py run-converted-model-embeddings-logits.sh run-converted-model.sh run-org-model.pyembedding
compare-embeddings-logits.sh convert-model.sh modelcard.template run-converted-model.sh run-original-model.pyutils
__init__.py check-nmse.py common.py compare_tokens.py create-collection-add-model.sh curl-embedding-server.sh hf-add-model-to-collection.py hf-create-collection.py hf-create-model.py hf-upload-gguf-model.py inspect-converted-model.sh inspect-org-model.py perplexity-gen.sh perplexity-run-simple.sh perplexity-run.sh quantize.sh run-embedding-server.sh semantic_check.py tensor-info.pysycl
CMakeLists.txt README.md build.sh ls-sycl-device.cpp run-llama2.sh test.sh win-build-sycl.bat win-run-llama2.bat win-test.batggml
include
ggml-alloc.h ggml-backend.h ggml-blas.h ggml-cann.h ggml-cpp.h ggml-cpu.h ggml-cuda.h ggml-hexagon.h ggml-metal.h ggml-opencl.h ggml-opt.h ggml-rpc.h ggml-sycl.h ggml-virtgpu.h ggml-vulkan.h ggml-webgpu.h ggml-zdnn.h ggml-zendnn.h ggml.h gguf.hsrc
ggml-cann
CMakeLists.txt acl_tensor.cpp acl_tensor.h aclnn_ops.cpp aclnn_ops.h common.h ggml-cann.cppggml-cpu
CMakeLists.txt arch-fallback.h binary-ops.cpp binary-ops.h common.h ggml-cpu-impl.h ggml-cpu.c ggml-cpu.cpp hbm.cpp hbm.h ops.cpp ops.h quants.c quants.h repack.cpp repack.h simd-mappings.h traits.cpp traits.h unary-ops.cpp unary-ops.h vec.cpp vec.hggml-cuda
template-instances
fattn-mma-f16-instance-ncols1_1-ncols2_16.cu fattn-mma-f16-instance-ncols1_1-ncols2_32.cu fattn-mma-f16-instance-ncols1_1-ncols2_8.cu fattn-mma-f16-instance-ncols1_16-ncols2_1.cu fattn-mma-f16-instance-ncols1_16-ncols2_2.cu fattn-mma-f16-instance-ncols1_16-ncols2_4.cu fattn-mma-f16-instance-ncols1_2-ncols2_16.cu fattn-mma-f16-instance-ncols1_2-ncols2_32.cu fattn-mma-f16-instance-ncols1_2-ncols2_4.cu fattn-mma-f16-instance-ncols1_2-ncols2_8.cu fattn-mma-f16-instance-ncols1_32-ncols2_1.cu fattn-mma-f16-instance-ncols1_32-ncols2_2.cu fattn-mma-f16-instance-ncols1_4-ncols2_16.cu fattn-mma-f16-instance-ncols1_4-ncols2_2.cu fattn-mma-f16-instance-ncols1_4-ncols2_4.cu fattn-mma-f16-instance-ncols1_4-ncols2_8.cu fattn-mma-f16-instance-ncols1_64-ncols2_1.cu fattn-mma-f16-instance-ncols1_8-ncols2_1.cu fattn-mma-f16-instance-ncols1_8-ncols2_2.cu fattn-mma-f16-instance-ncols1_8-ncols2_4.cu fattn-mma-f16-instance-ncols1_8-ncols2_8.cu fattn-tile-instance-dkq112-dv112.cu fattn-tile-instance-dkq128-dv128.cu fattn-tile-instance-dkq256-dv256.cu fattn-tile-instance-dkq40-dv40.cu fattn-tile-instance-dkq576-dv512.cu fattn-tile-instance-dkq64-dv64.cu fattn-tile-instance-dkq72-dv72.cu fattn-tile-instance-dkq80-dv80.cu fattn-tile-instance-dkq96-dv96.cu fattn-vec-instance-f16-f16.cu fattn-vec-instance-f16-q4_0.cu fattn-vec-instance-f16-q4_1.cu fattn-vec-instance-f16-q5_0.cu fattn-vec-instance-f16-q5_1.cu fattn-vec-instance-f16-q8_0.cu fattn-vec-instance-q4_0-f16.cu fattn-vec-instance-q4_0-q4_0.cu fattn-vec-instance-q4_0-q4_1.cu fattn-vec-instance-q4_0-q5_0.cu fattn-vec-instance-q4_0-q5_1.cu fattn-vec-instance-q4_0-q8_0.cu fattn-vec-instance-q4_1-f16.cu fattn-vec-instance-q4_1-q4_0.cu fattn-vec-instance-q4_1-q4_1.cu fattn-vec-instance-q4_1-q5_0.cu fattn-vec-instance-q4_1-q5_1.cu fattn-vec-instance-q4_1-q8_0.cu fattn-vec-instance-q5_0-f16.cu fattn-vec-instance-q5_0-q4_0.cu fattn-vec-instance-q5_0-q4_1.cu fattn-vec-instance-q5_0-q5_0.cu fattn-vec-instance-q5_0-q5_1.cu fattn-vec-instance-q5_0-q8_0.cu fattn-vec-instance-q5_1-f16.cu fattn-vec-instance-q5_1-q4_0.cu fattn-vec-instance-q5_1-q4_1.cu fattn-vec-instance-q5_1-q5_0.cu fattn-vec-instance-q5_1-q5_1.cu fattn-vec-instance-q5_1-q8_0.cu fattn-vec-instance-q8_0-f16.cu fattn-vec-instance-q8_0-q4_0.cu fattn-vec-instance-q8_0-q4_1.cu fattn-vec-instance-q8_0-q5_0.cu fattn-vec-instance-q8_0-q5_1.cu fattn-vec-instance-q8_0-q8_0.cu generate_cu_files.py mmf-instance-ncols_1.cu mmf-instance-ncols_10.cu mmf-instance-ncols_11.cu mmf-instance-ncols_12.cu mmf-instance-ncols_13.cu mmf-instance-ncols_14.cu mmf-instance-ncols_15.cu mmf-instance-ncols_16.cu mmf-instance-ncols_2.cu mmf-instance-ncols_3.cu mmf-instance-ncols_4.cu mmf-instance-ncols_5.cu mmf-instance-ncols_6.cu mmf-instance-ncols_7.cu mmf-instance-ncols_8.cu mmf-instance-ncols_9.cu mmq-instance-iq1_s.cu mmq-instance-iq2_s.cu mmq-instance-iq2_xs.cu mmq-instance-iq2_xxs.cu mmq-instance-iq3_s.cu mmq-instance-iq3_xxs.cu mmq-instance-iq4_nl.cu mmq-instance-iq4_xs.cu mmq-instance-mxfp4.cu mmq-instance-q2_k.cu mmq-instance-q3_k.cu mmq-instance-q4_0.cu mmq-instance-q4_1.cu mmq-instance-q4_k.cu mmq-instance-q5_0.cu mmq-instance-q5_1.cu mmq-instance-q5_k.cu mmq-instance-q6_k.cu mmq-instance-q8_0.cuggml-hexagon
htp
CMakeLists.txt act-ops.c argsort-ops.c binary-ops.c cmake-toolchain.cmake cpy-ops.c flash-attn-ops.c get-rows-ops.c hex-dma.c hex-dma.h hex-dump.h hex-fastdiv.h hex-utils.h htp-ctx.h htp-msg.h htp-ops.h htp_iface.idl hvx-arith.h hvx-base.h hvx-copy.h hvx-div.h hvx-dump.h hvx-exp.h hvx-floor.h hvx-inverse.h hvx-reduce.h hvx-scale.h hvx-sigmoid.h hvx-sqrt.h hvx-types.h hvx-utils.h main.c matmul-ops.c rope-ops.c set-rows-ops.c softmax-ops.c sum-rows-ops.c unary-ops.c worker-pool.c worker-pool.hggml-metal
CMakeLists.txt ggml-metal-common.cpp ggml-metal-common.h ggml-metal-context.h ggml-metal-context.m ggml-metal-device.cpp ggml-metal-device.h ggml-metal-device.m ggml-metal-impl.h ggml-metal-ops.cpp ggml-metal-ops.h ggml-metal.cpp ggml-metal.metalggml-opencl
kernels
add.cl add_id.cl argsort.cl clamp.cl concat.cl conv2d.cl conv2d_f16_f32.cl cpy.cl cvt.cl diag_mask_inf.cl div.cl embed_kernel.py expm1.cl fill.cl flash_attn_f16.cl flash_attn_f32.cl flash_attn_f32_f16.cl gelu.cl gemm_moe_mxfp4_f32.cl gemv_moe_mxfp4_f32.cl gemv_noshuffle.cl gemv_noshuffle_general.cl gemv_noshuffle_general_q8_0_f32.cl get_rows.cl glu.cl group_norm.cl im2col_f16.cl im2col_f32.cl mean.cl mul.cl mul_mat_Ab_Bi_8x4.cl mul_mat_f16_f32.cl mul_mm_f16_f32_kq_kqv.cl mul_mm_f16_f32_l4_lm.cl mul_mm_f32_f32_l4_lm.cl mul_mm_q6_k_f32_l4_lm.cl mul_mm_q8_0_f32_8x4.cl mul_mm_q8_0_f32_l4_lm.cl mul_mv_f16_f16.cl mul_mv_f16_f32.cl mul_mv_f16_f32_1row.cl mul_mv_f16_f32_l4.cl mul_mv_f32_f32.cl mul_mv_id_mxfp4_f32.cl mul_mv_id_mxfp4_f32_flat.cl mul_mv_id_q4_0_f32_8x_flat.cl mul_mv_id_q8_0_f32.cl mul_mv_id_q8_0_f32_flat.cl mul_mv_mxfp4_f32.cl mul_mv_mxfp4_f32_flat.cl mul_mv_q4_0_f32.cl mul_mv_q4_0_f32_1d_16x_flat.cl mul_mv_q4_0_f32_1d_8x_flat.cl mul_mv_q4_0_f32_8x_flat.cl mul_mv_q4_0_f32_v.cl mul_mv_q4_k_f32.cl mul_mv_q6_k_f32.cl mul_mv_q6_k_f32_flat.cl mul_mv_q8_0_f32.cl mul_mv_q8_0_f32_flat.cl norm.cl pad.cl relu.cl repeat.cl rms_norm.cl rope.cl scale.cl set_rows.cl sigmoid.cl silu.cl softmax_4_f16.cl softmax_4_f32.cl softmax_f16.cl softmax_f32.cl softplus.cl solve_tri.cl sqr.cl sqrt.cl ssm_conv.cl sub.cl sum_rows.cl tanh.cl transpose.cl tri.cl tsembd.cl upscale.clggml-sycl
CMakeLists.txt add-id.cpp add-id.hpp backend.hpp binbcast.cpp binbcast.hpp common.cpp common.hpp concat.cpp concat.hpp conv.cpp conv.hpp convert.cpp convert.hpp count-equal.cpp count-equal.hpp cpy.cpp cpy.hpp dequantize.hpp dmmv.cpp dmmv.hpp element_wise.cpp element_wise.hpp gemm.hpp getrows.cpp getrows.hpp ggml-sycl.cpp gla.cpp gla.hpp im2col.cpp im2col.hpp mmq.cpp mmq.hpp mmvq.cpp mmvq.hpp norm.cpp norm.hpp outprod.cpp outprod.hpp pad.cpp pad.hpp pad_reflect_1d.cpp pad_reflect_1d.hpp presets.hpp quantize.hpp quants.hpp repeat_back.cpp repeat_back.hpp roll.cpp roll.hpp rope.cpp rope.hpp set.cpp set.hpp set_rows.cpp set_rows.hpp softmax.cpp softmax.hpp ssm_conv.cpp ssm_conv.hpp sycl_hw.cpp sycl_hw.hpp tsembd.cpp tsembd.hpp vecdotq.hpp wkv.cpp wkv.hppggml-virtgpu
backend
CMakeLists.txt apir_cs_ggml-rpc-back.cpp backend-convert.h backend-dispatched-backend.cpp backend-dispatched-buffer-type.cpp backend-dispatched-buffer.cpp backend-dispatched-device.cpp backend-dispatched.cpp backend-dispatched.gen.h backend-dispatched.h backend-virgl-apir.h backend.cppggml-vulkan
vulkan-shaders
CMakeLists.txt abs.comp acc.comp add.comp add1.comp add_id.comp arange.comp argmax.comp argsort.comp argsort_large.comp ceil.comp clamp.comp concat.comp contig_copy.comp conv2d_dw.comp conv2d_mm.comp conv_transpose_1d.comp copy.comp copy_from_quant.comp copy_to_quant.comp copy_transpose.comp cos.comp count_equal.comp count_experts.comp cumsum.comp cumsum_multipass1.comp cumsum_multipass2.comp dequant_f32.comp dequant_funcs.glsl dequant_funcs_cm2.glsl dequant_head.glsl dequant_iq1_m.comp dequant_iq1_s.comp dequant_iq2_s.comp dequant_iq2_xs.comp dequant_iq2_xxs.comp dequant_iq3_s.comp dequant_iq3_xxs.comp dequant_iq4_nl.comp dequant_iq4_xs.comp dequant_mxfp4.comp dequant_q2_k.comp dequant_q3_k.comp dequant_q4_0.comp dequant_q4_1.comp dequant_q4_k.comp dequant_q5_0.comp dequant_q5_1.comp dequant_q5_k.comp dequant_q6_k.comp dequant_q8_0.comp diag.comp diag_mask_inf.comp div.comp exp.comp fill.comp flash_attn.comp flash_attn_base.glsl flash_attn_cm1.comp flash_attn_cm2.comp flash_attn_mask_opt.comp flash_attn_split_k_reduce.comp floor.comp geglu.comp geglu_erf.comp geglu_quick.comp gelu.comp gelu_erf.comp gelu_quick.comp generic_binary_head.glsl generic_head.glsl generic_unary_head.glsl get_rows.comp get_rows_quant.comp glu_head.glsl glu_main.glsl group_norm.comp hardsigmoid.comp hardswish.comp im2col.comp im2col_3d.comp l2_norm.comp leaky_relu.comp log.comp mul.comp mul_mat_split_k_reduce.comp mul_mat_vec.comp mul_mat_vec_base.glsl mul_mat_vec_iface.glsl mul_mat_vec_iq1_m.comp mul_mat_vec_iq1_s.comp mul_mat_vec_iq2_s.comp mul_mat_vec_iq2_xs.comp mul_mat_vec_iq2_xxs.comp mul_mat_vec_iq3_s.comp mul_mat_vec_iq3_xxs.comp mul_mat_vec_nc.comp mul_mat_vec_p021.comp mul_mat_vec_q2_k.comp mul_mat_vec_q3_k.comp mul_mat_vec_q4_k.comp mul_mat_vec_q5_k.comp mul_mat_vec_q6_k.comp mul_mat_vecq.comp mul_mat_vecq_funcs.glsl mul_mm.comp mul_mm_cm2.comp mul_mm_funcs.glsl mul_mm_id_funcs.glsl mul_mmq.comp mul_mmq_funcs.glsl mul_mmq_shmem_types.glsl multi_add.comp neg.comp norm.comp opt_step_adamw.comp opt_step_sgd.comp pad.comp pool2d.comp quantize_q8_1.comp reglu.comp relu.comp repeat.comp repeat_back.comp rms_norm.comp rms_norm_back.comp rms_norm_partials.comp roll.comp rope_funcs.glsl rope_head.glsl rope_multi.comp rope_neox.comp rope_norm.comp rope_params.glsl rope_vision.comp round.comp rte.glsl scale.comp sigmoid.comp silu.comp silu_back.comp sin.comp soft_max.comp soft_max_back.comp soft_max_large1.comp soft_max_large2.comp soft_max_large3.comp soft_max_large_common.glsl softplus.comp solve_tri.comp sqrt.comp square.comp ssm_conv.comp ssm_scan.comp step.comp sub.comp sum_rows.comp sum_rows.glsl swiglu.comp swiglu_oai.comp tanh.comp timestep_embedding.comp topk_argsort.comp topk_moe.comp topk_nary_search.comp tri.comp trunc.comp types.glsl upscale.comp utils.glsl vulkan-shaders-gen.cpp wkv6.comp wkv7.comp xielu.compggml-webgpu
wgsl-shaders
argmax.wgsl argsort.wgsl argsort_merge.wgsl binary.wgsl common_decls.tmpl cpy.tmpl.wgsl cumsum.wgsl embed_wgsl.py flash_attn.wgsl get_rows.tmpl.wgsl glu.tmpl.wgsl memset.wgsl mul_mat.tmpl.wgsl mul_mat_decls.tmpl mul_mat_reg_tile.tmpl.wgsl mul_mat_subgroup_matrix.tmpl.wgsl mul_mat_vec.tmpl.wgsl pad.wgsl rms_norm.wgsl rope.tmpl.wgsl scale.tmpl.wgsl set_rows.wgsl soft_max.tmpl.wgsl sum_rows.wgsl unary.wgslgguf-py
gguf
scripts
gguf_convert_endian.py gguf_dump.py gguf_editor_gui.py gguf_hash.py gguf_new_metadata.py gguf_set_metadata.pygrammars
README.md arithmetic.gbnf c.gbnf chess.gbnf english.gbnf japanese.gbnf json.gbnf json_arr.gbnf list.gbnfmedia
llama0-banner.png llama0-logo.png llama1-banner.png llama1-icon-transparent.png llama1-icon-transparent.svg llama1-icon.png llama1-icon.svg llama1-logo.png llama1-logo.svg matmul.png matmul.svgmodels
templates
Apertus-8B-Instruct.jinja ByteDance-Seed-OSS.jinja CohereForAI-c4ai-command-r-plus-tool_use.jinja CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja GLM-4.6.jinja Kimi-K2-Instruct.jinja Kimi-K2-Thinking.jinja MiMo-VL.jinja MiniMax-M2.jinja Mistral-Small-3.2-24B-Instruct-2506.jinja NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja NVIDIA-Nemotron-Nano-v2.jinja NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja Qwen-QwQ-32B.jinja Qwen-Qwen2.5-7B-Instruct.jinja Qwen-Qwen3-0.6B.jinja Qwen3-Coder.jinja README.md deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja deepseek-ai-DeepSeek-V3.1.jinja fireworks-ai-llama-3-firefunction-v2.jinja google-gemma-2-2b-it.jinja ibm-granite-granite-3.3-2B-Instruct.jinja llama-cpp-deepseek-r1.jinja llama-cpp-lfm2.jinja llama-cpp-rwkv-world.jinja meetkai-functionary-medium-v3.1.jinja meetkai-functionary-medium-v3.2.jinja meta-llama-Llama-3.1-8B-Instruct.jinja meta-llama-Llama-3.2-3B-Instruct.jinja meta-llama-Llama-3.3-70B-Instruct.jinja microsoft-Phi-3.5-mini-instruct.jinja mistralai-Ministral-3-14B-Reasoning-2512.jinja mistralai-Mistral-Nemo-Instruct-2407.jinja moonshotai-Kimi-K2.jinja openai-gpt-oss-120b.jinja unsloth-Apriel-1.5.jinja unsloth-mistral-Devstral-Small-2507.jinja upstage-Solar-Open-100B.jinjarequirements
requirements-all.txt requirements-compare-llama-bench.txt requirements-convert_hf_to_gguf.txt requirements-convert_hf_to_gguf_update.txt requirements-convert_legacy_llama.txt requirements-convert_llama_ggml_to_gguf.txt requirements-convert_lora_to_gguf.txt requirements-gguf_editor_gui.txt requirements-pydantic.txt requirements-server-bench.txt requirements-test-tokenizer-random.txt requirements-tool_bench.txtscripts
bench-models.sh build-info.sh check-requirements.sh compare-commits.sh compare-llama-bench.py compare-logprobs.py create_ops_docs.py debug-test.sh fetch_server_test_models.py gen-authors.sh gen-unicode-data.py get-flags.mk get-hellaswag.sh get-pg.sh get-wikitext-103.sh get-wikitext-2.sh get-winogrande.sh get_chat_template.py hf.sh install-oneapi.bat pr2wt.sh serve-static.js server-bench.py sync-ggml-am.sh sync-ggml.last sync-ggml.sh sync_vendor.py tool_bench.py tool_bench.sh verify-checksum-models.py xxd.cmakesrc
models
afmoe.cpp apertus.cpp arcee.cpp arctic.cpp arwkv7.cpp baichuan.cpp bailingmoe.cpp bailingmoe2.cpp bert.cpp bitnet.cpp bloom.cpp chameleon.cpp chatglm.cpp codeshell.cpp cogvlm.cpp cohere2-iswa.cpp command-r.cpp dbrx.cpp deci.cpp deepseek.cpp deepseek2.cpp dots1.cpp dream.cpp ernie4-5-moe.cpp ernie4-5.cpp exaone-moe.cpp exaone.cpp exaone4.cpp falcon-h1.cpp falcon.cpp gemma-embedding.cpp gemma.cpp gemma2-iswa.cpp gemma3.cpp gemma3n-iswa.cpp glm4-moe.cpp glm4.cpp gpt2.cpp gptneox.cpp granite-hybrid.cpp granite.cpp graph-context-mamba.cpp grok.cpp grovemoe.cpp hunyuan-dense.cpp hunyuan-moe.cpp internlm2.cpp jais.cpp jamba.cpp kimi-linear.cpp lfm2.cpp llada-moe.cpp llada.cpp llama-iswa.cpp llama.cpp maincoder.cpp mamba.cpp mimo2-iswa.cpp minicpm3.cpp minimax-m2.cpp mistral3.cpp models.h modern-bert.cpp mpt.cpp nemotron-h.cpp nemotron.cpp neo-bert.cpp olmo.cpp olmo2.cpp olmoe.cpp openai-moe-iswa.cpp openelm.cpp orion.cpp pangu-embedded.cpp phi2.cpp phi3.cpp plamo.cpp plamo2.cpp plamo3.cpp plm.cpp qwen.cpp qwen2.cpp qwen2moe.cpp qwen2vl.cpp qwen3.cpp qwen35.cpp qwen35moe.cpp qwen3moe.cpp qwen3next.cpp qwen3vl-moe.cpp qwen3vl.cpp refact.cpp rnd1.cpp rwkv6-base.cpp rwkv6.cpp rwkv6qwen2.cpp rwkv7-base.cpp rwkv7.cpp seed-oss.cpp smallthinker.cpp smollm3.cpp stablelm.cpp starcoder.cpp starcoder2.cpp step35-iswa.cpp t5-dec.cpp t5-enc.cpp wavtokenizer-dec.cpp xverse.cpptests
peg-parser
simple-tokenize.cpp simple-tokenize.h test-basic.cpp test-gbnf-generation.cpp test-json-parser.cpp test-json-serialization.cpp test-unicode.cpp tests.htools
cvector-generator
CMakeLists.txt README.md completions.txt cvector-generator.cpp mean.hpp negative.txt pca.hpp positive.txtmtmd
legacy-models
convert_image_encoder_to_gguf.py glmedge-convert-image-encoder-to-gguf.py glmedge-surgery.py llava_surgery.py llava_surgery_v2.py minicpmv-convert-image-encoder-to-gguf.py minicpmv-surgery.pymodels
cogvlm.cpp conformer.cpp glm4v.cpp internvl.cpp kimik25.cpp kimivl.cpp llama4.cpp llava.cpp minicpmv.cpp mobilenetv5.cpp models.h pixtral.cpp qwen2vl.cpp qwen3vl.cpp siglip.cpp whisper-enc.cpp youtuvl.cppserver
public_legacy
colorthemes.css completion.js favicon.ico index-new.html index.html index.js json-schema-to-grammar.mjs loading.html prompt-formats.js style.css system-prompts.js theme-beeninorder.css theme-ketivah.css theme-mangotango.css theme-playground.css theme-polarnight.css theme-snowstorm.csspublic_simplechat
datautils.mjs index.html readme.md simplechat.css simplechat.js simplechat_screens.webp ui.mjstests
unit
test_basic.py test_chat_completion.py test_compat_anthropic.py test_compat_oai_responses.py test_completion.py test_ctx_shift.py test_embedding.py test_infill.py test_lora.py test_rerank.py test_router.py test_security.py test_sleep.py test_slot_save.py test_speculative.py test_template.py test_tokenize.py test_tool_call.py test_vision_api.pywebui
.storybook
ModeWatcherDecorator.svelte TooltipProviderDecorator.svelte main.ts preview.ts vitest.setup.tssrc
lib
components
app
chat
ChatAttachments
ChatAttachmentPreview.svelte ChatAttachmentThumbnailFile.svelte ChatAttachmentThumbnailImage.svelte ChatAttachmentsList.svelte ChatAttachmentsViewAll.svelteChatForm
ChatFormActions
ChatFormActionFileAttachments.svelte ChatFormActionRecord.svelte ChatFormActionSubmit.svelte ChatFormActions.svelteChatMessages
ChatMessage.svelte ChatMessageActions.svelte ChatMessageAssistant.svelte ChatMessageBranchingControls.svelte ChatMessageEditForm.svelte ChatMessageStatistics.svelte ChatMessageSystem.svelte ChatMessageThinkingBlock.svelte ChatMessageUser.svelte ChatMessages.svelteChatScreen
ChatScreen.svelte ChatScreenDragOverlay.svelte ChatScreenHeader.svelte ChatScreenProcessingInfo.sveltedialogs
DialogChatAttachmentPreview.svelte DialogChatAttachmentsViewAll.svelte DialogChatError.svelte DialogChatSettings.svelte DialogConfirmation.svelte DialogConversationSelection.svelte DialogConversationTitleUpdate.svelte DialogEmptyFileAlert.svelte DialogModelInformation.svelte DialogModelNotAvailable.sveltemisc
ActionButton.svelte ActionDropdown.svelte BadgeChatStatistic.svelte BadgeInfo.svelte BadgeModality.svelte CodePreviewDialog.svelte ConversationSelection.svelte CopyToClipboardIcon.svelte KeyboardShortcutInfo.svelte MarkdownContent.svelte RemoveButton.svelte SearchInput.svelte SyntaxHighlightedCode.svelteui
alert-dialog
alert-dialog-action.svelte alert-dialog-cancel.svelte alert-dialog-content.svelte alert-dialog-description.svelte alert-dialog-footer.svelte alert-dialog-header.svelte alert-dialog-overlay.svelte alert-dialog-title.svelte alert-dialog-trigger.svelte index.tscard
card-action.svelte card-content.svelte card-description.svelte card-footer.svelte card-header.svelte card-title.svelte card.svelte index.tsdialog
dialog-close.svelte dialog-content.svelte dialog-description.svelte dialog-footer.svelte dialog-header.svelte dialog-overlay.svelte dialog-title.svelte dialog-trigger.svelte index.tsdropdown-menu
dropdown-menu-checkbox-item.svelte dropdown-menu-content.svelte dropdown-menu-group-heading.svelte dropdown-menu-group.svelte dropdown-menu-item.svelte dropdown-menu-label.svelte dropdown-menu-radio-group.svelte dropdown-menu-radio-item.svelte dropdown-menu-separator.svelte dropdown-menu-shortcut.svelte dropdown-menu-sub-content.svelte dropdown-menu-sub-trigger.svelte dropdown-menu-trigger.svelte index.tspopover
index.ts popover-close.svelte popover-content.svelte popover-portal.svelte popover-trigger.svelte popover.svelteselect
index.ts select-content.svelte select-group-heading.svelte select-group.svelte select-item.svelte select-label.svelte select-scroll-down-button.svelte select-scroll-up-button.svelte select-separator.svelte select-trigger.sveltesheet
index.ts sheet-close.svelte sheet-content.svelte sheet-description.svelte sheet-footer.svelte sheet-header.svelte sheet-overlay.svelte sheet-title.svelte sheet-trigger.sveltesidebar
constants.ts context.svelte.ts index.ts sidebar-content.svelte sidebar-footer.svelte sidebar-group-action.svelte sidebar-group-content.svelte sidebar-group-label.svelte sidebar-group.svelte sidebar-header.svelte sidebar-input.svelte sidebar-inset.svelte sidebar-menu-action.svelte sidebar-menu-badge.svelte sidebar-menu-button.svelte sidebar-menu-item.svelte sidebar-menu-skeleton.svelte sidebar-menu-sub-button.svelte sidebar-menu-sub-item.svelte sidebar-menu-sub.svelte sidebar-menu.svelte sidebar-provider.svelte sidebar-rail.svelte sidebar-separator.svelte sidebar-trigger.svelte sidebar.sveltetable
index.ts table-body.svelte table-caption.svelte table-cell.svelte table-footer.svelte table-head.svelte table-header.svelte table-row.svelte table.svelteconstants
auto-scroll.ts binary-detection.ts default-context.ts floating-ui-constraints.ts icons.ts input-classes.ts latex-protection.ts literal-html.ts localstorage-keys.ts max-bundle-size.ts precision.ts processing-info.ts settings-config.ts supported-file-types.ts table-html-restorer.ts tooltip-config.ts viewport.tsstores
chat.svelte.ts conversations.svelte.ts models.svelte.ts persisted.svelte.ts server.svelte.ts settings.svelte.tsutils
api-headers.ts api-key-validation.ts attachment-display.ts attachment-type.ts audio-recording.ts autoresize-textarea.ts branching.ts browser-only.ts clipboard.ts config-helpers.ts conversation-utils.ts convert-files-to-extra.ts file-preview.ts file-type.ts formatters.ts index.ts is-ime-composing.ts latex-protection.ts modality-file-validation.ts model-names.ts pdf-processing.ts portal-to-body.ts precision.ts process-uploaded-files.ts svg-to-png.ts syntax-highlight-language.ts text-files.ts text.ts webp-to-png.tstests
llama.cpp/ggml/src/ggml-hexagon/htp/main.c
raw
1#pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments"
2#pragma clang diagnostic ignored "-Wunused-function"
3
4#include <HAP_farf.h>
5#include <HAP_perf.h>
6#include <AEEStdErr.h>
7#include <dspqueue.h>
8#include <HAP_compute_res.h>
9#include <HAP_etm_config.h>
10#include <HAP_mem.h>
11#include <HAP_power.h>
12#include <HAP_ps.h>
13#include <qurt.h>
14#include <qurt_thread.h>
15#include <remote.h>
16#include <string.h>
17
18#include "hex-dma.h"
19#include "hex-utils.h"
20
21#define GGML_COMMON_DECL_C
22#include "ggml-common.h"
23#include "htp-ctx.h"
24#include "htp-msg.h"
25#include "htp-ops.h"
26#include "worker-pool.h"
27
28AEEResult htp_iface_open(const char * uri, remote_handle64 * handle) {
29 struct htp_context * ctx;
30 int err = 0;
31
32 ctx = calloc(1, sizeof(*ctx));
33 if (ctx == NULL) {
34 return AEE_ENOMEMORY;
35 }
36
37 // Use the context structure as a handle
38 *handle = (remote_handle64) ctx;
39
40 // Enable FARF logs
41 HAP_setFARFRuntimeLoggingParams(0xffff, NULL, 0);
42
43 // Set client class
44 {
45 HAP_power_request_t request;
46 memset(&request, 0, sizeof(HAP_power_request_t));
47 request.type = HAP_power_set_apptype;
48 request.apptype = HAP_POWER_COMPUTE_CLIENT_CLASS;
49
50 if ((err = HAP_power_set((void *) ctx, &request)) != 0) {
51 return err;
52 }
53 }
54
55 {
56 HAP_power_request_t request;
57 memset(&request, 0, sizeof(request));
58
59 request.type = HAP_power_set_DCVS_v3;
60 request.dcvs_v3.set_dcvs_enable = TRUE;
61 request.dcvs_v3.dcvs_enable = TRUE;
62 request.dcvs_v3.dcvs_option = HAP_DCVS_V2_PERFORMANCE_MODE;
63 request.dcvs_v3.set_bus_params = TRUE;
64 request.dcvs_v3.bus_params.min_corner = HAP_DCVS_VCORNER_MAX;
65 request.dcvs_v3.bus_params.max_corner = HAP_DCVS_VCORNER_MAX;
66 request.dcvs_v3.bus_params.target_corner = HAP_DCVS_VCORNER_MAX;
67 request.dcvs_v3.set_core_params = TRUE;
68 request.dcvs_v3.core_params.min_corner = HAP_DCVS_VCORNER_MAX;
69 request.dcvs_v3.core_params.max_corner = HAP_DCVS_VCORNER_MAX;
70 request.dcvs_v3.core_params.target_corner = HAP_DCVS_VCORNER_MAX;
71 request.dcvs_v3.set_sleep_disable = TRUE;
72 request.dcvs_v3.sleep_disable = TRUE;
73 if ((err = HAP_power_set((void *) ctx, &request)) != 0) {
74 return err;
75 }
76
77 memset(&request, 0, sizeof(request));
78 request.type = HAP_power_set_HVX;
79 request.hvx.power_up = TRUE;
80 if ((err = HAP_power_set((void *) ctx, &request)) != 0) {
81 return err;
82 }
83 }
84
85 {
86 // Power on HMX
87 HAP_power_request_t request;
88 memset(&request, 0, sizeof(HAP_power_request_t));
89 request.type = HAP_power_set_HMX;
90 request.hmx.power_up = TRUE;
91 FARF(ALWAYS, "Powering HMX on\n");
92 err = HAP_power_set((void *) &ctx, &request);
93 if (err != AEE_SUCCESS) {
94 FARF(ERROR, "Error powering on HMX.");
95 return err;
96 }
97 }
98
99 return AEE_SUCCESS;
100}
101
102AEEResult htp_iface_close(remote_handle64 handle) {
103 struct htp_context * ctx = (struct htp_context *) handle;
104
105 if (!ctx) {
106 return AEE_EBADPARM;
107 }
108
109 if (ctx->queue) {
110 FARF(ERROR, "Closing handle with queue still open");
111 return AEE_EITEMBUSY;
112 }
113
114 free(ctx);
115 return AEE_SUCCESS;
116}
117
118AEEResult htp_iface_enable_etm(remote_handle64 handle) {
119 int err = HAP_user_etm_enable();
120 if (err) {
121 if (err == AEE_EVERSIONNOTSUPPORT) {
122 FARF(ERROR, "API HAP_user_etm_enable is not supported\n");
123 } else {
124 FARF(ERROR, "Error executing HAP_user_etm_enable with error code : 0x%x\n", err);
125 }
126 }
127 return err;
128}
129
130AEEResult htp_iface_disable_etm(remote_handle64 handle) {
131 int err = HAP_user_etm_disable();
132 if (err) {
133 if (err == AEE_EVERSIONNOTSUPPORT) {
134 FARF(ERROR, "API HAP_user_etm_disable is not supported\n");
135 } else {
136 FARF(ERROR, "Error executing HAP_user_etm_disable with error code : 0x%x\n", err);
137 }
138 }
139 return err;
140}
141
142static int vtcm_acquire(struct htp_context * ctx) {
143 int err;
144 if (!ctx->vtcm_valid) {
145 // Temporarily bump thread priority to make sure it's higher than other sessions.
146 // This way the resource manager will notify the other thread to release VTCM.
147 // Note that we need to reaquire VTCM at normal priority for this to work next time.
148 qurt_thread_set_priority(qurt_thread_get_id(), ctx->thread_prio - 10);
149 err = HAP_compute_res_acquire_cached(ctx->vtcm_rctx, 1000000);
150 if (err != 0) {
151 FARF(ERROR, "Failed to acquire VTCM: 0x%08x", (unsigned)err);
152 abort();
153 }
154 HAP_compute_res_release_cached(ctx->vtcm_rctx);
155 qurt_thread_set_priority(qurt_thread_get_id(), ctx->thread_prio);
156
157 err = HAP_compute_res_acquire_cached(ctx->vtcm_rctx, 1000000);
158 if (err != 0) {
159 FARF(ERROR, "Failed to acquire VTCM: 0x%08x", (unsigned)err);
160 abort();
161 }
162 ctx->vtcm_valid = true;
163 }
164
165 ctx->vtcm_inuse = true;
166 return 0;
167}
168
169static int vtcm_release(struct htp_context * ctx) {
170 ctx->vtcm_inuse = false;
171
172 if (ctx->vtcm_valid && ctx->vtcm_needs_release) {
173 ctx->vtcm_valid = false;
174 ctx->vtcm_needs_release = false;
175 HAP_compute_res_release_cached(ctx->vtcm_rctx);
176 }
177
178 return 0;
179}
180
181static int vtcm_release_callback(unsigned int rctx, void * state) {
182 struct htp_context * ctx = (struct htp_context *) state;
183
184 if (!ctx || ctx->vtcm_rctx != rctx) {
185 return AEE_EBADPARM;
186 }
187
188 // If VTCM is not inuse (not processing Ops) release it right here
189 // otherwise we'll release it once we're done with the current Op.
190
191 if (ctx->vtcm_inuse) {
192 ctx->vtcm_needs_release = false;
193 return 0;
194 }
195
196 ctx->vtcm_valid = false;
197 HAP_compute_res_release_cached(ctx->vtcm_rctx);
198
199 return 0;
200}
201
202static int vtcm_alloc(struct htp_context * ctx) {
203 unsigned int vtcm_size = 8 * 1024 * 1024; // 8MB default
204 HAP_compute_res_query_VTCM(0, &vtcm_size, NULL, NULL, NULL);
205
206 compute_res_attr_t attr;
207 HAP_compute_res_attr_init(&attr);
208 HAP_compute_res_attr_set_serialize(&attr, 0);
209 HAP_compute_res_attr_set_cache_mode(&attr, 1);
210 HAP_compute_res_attr_set_vtcm_param_v2(&attr, vtcm_size, 0, vtcm_size);
211 HAP_compute_res_attr_set_release_callback(&attr, vtcm_release_callback, (void *) ctx);
212 HAP_compute_res_attr_set_hmx_param(&attr, 1);
213
214 // Allocate VTCM for scratch pads
215 uint32_t rctx = HAP_compute_res_acquire(&attr, 1000000 /* timeout */);
216 if (!rctx) {
217 FARF(ERROR, "failed to allocate %zu bytes VTCM\n", ctx->vtcm_size);
218 return AEE_ENOMEMORY;
219 }
220
221 void * vtcm_ptr;
222 if (HAP_compute_res_attr_get_vtcm_ptr_v2(&attr, &vtcm_ptr, &vtcm_size) != 0) {
223 HAP_compute_res_release(rctx);
224 FARF(ERROR, "failed to allocate %zu bytes VTCM (new)\n", ctx->vtcm_size);
225 return AEE_ENOMEMORY;
226 }
227
228 ctx->vtcm_base = (uint8_t *) vtcm_ptr;
229 ctx->vtcm_size = vtcm_size;
230 ctx->vtcm_rctx = rctx;
231 ctx->vtcm_valid = false;
232 ctx->vtcm_inuse = false;
233 ctx->vtcm_needs_release = false;
234
235 return 0;
236}
237
238static void vtcm_free(struct htp_context * ctx) {
239 if (ctx->vtcm_rctx) {
240 HAP_compute_res_release(ctx->vtcm_rctx);
241 ctx->vtcm_base = 0;
242 ctx->vtcm_rctx = 0;
243 }
244}
245
246static void htp_packet_callback(dspqueue_t queue, int error, void * context);
247static void htp_error_callback(dspqueue_t queue, int error, void * context);
248
249AEEResult htp_iface_start(remote_handle64 handle, uint32 sess_id, uint64 dsp_queue_id, uint32 n_hvx) {
250 struct htp_context * ctx = (struct htp_context *) handle;
251
252 if (!ctx) {
253 return AEE_EBADPARM;
254 }
255
256 if (ctx->queue) {
257 FARF(ERROR, "Queue already open");
258 return AEE_EITEMBUSY;
259 }
260
261 // Import queue created on the CPU
262 int err = dspqueue_import(dsp_queue_id, // Queue ID from dspqueue_export
263 htp_packet_callback, // Packet callback
264 htp_error_callback, // Error callback; no errors expected on the DSP
265 (void *) ctx, // Callback context
266 &ctx->queue);
267
268 if (err) {
269 FARF(ERROR, "Queue import failed with 0x%08x", (unsigned) err);
270 return err;
271 }
272
273 ctx->thread_id = qurt_thread_get_id();
274 ctx->thread_prio = qurt_thread_get_priority(ctx->thread_id);
275
276 // allocate VTCM
277 err = vtcm_alloc(ctx);
278 if (err != AEE_SUCCESS) {
279 FARF(ERROR, "Unable to allocate VTCM");
280 return AEE_ENOMEMORY;
281 }
282
283 qurt_sysenv_max_hthreads_t hw_threads;
284 qurt_sysenv_get_max_hw_threads(&hw_threads);
285 uint32_t hw_nhvx = (qurt_hvx_get_units() >> 8) & 0xFF;
286
287 if (n_hvx == 0) {
288 n_hvx = hw_nhvx;
289 }
290 if (n_hvx > hw_threads.max_hthreads) {
291 n_hvx = hw_threads.max_hthreads;
292 }
293 if (n_hvx > HTP_MAX_NTHREADS) {
294 n_hvx = HTP_MAX_NTHREADS;
295 }
296
297 ctx->n_threads = n_hvx;
298 for (int i = 0; i < ctx->n_threads; i++) {
299 // see discussion https://github.com/ggml-org/llama.cpp/pull/18151#discussion_r2632388541
300 ctx->dma[i] = dma_queue_create(64);
301 }
302
303 // init worker pool
304 err = worker_pool_init(&ctx->worker_pool, n_hvx);
305 if (err != AEE_SUCCESS) {
306 FARF(ERROR, "Unable to create worker pool");
307 return err;
308 }
309
310 FARF(HIGH, "session %u started: n-hvx %u vtcm-size %zu vtcm-rctx %u n-threads %u thread-id %d thread-prio %d \n",
311 sess_id, hw_nhvx, ctx->vtcm_size, ctx->vtcm_rctx, ctx->n_threads, ctx->thread_id, ctx->thread_prio);
312
313 return AEE_SUCCESS;
314}
315
316AEEResult htp_iface_stop(remote_handle64 handle) {
317 struct htp_context * ctx = (struct htp_context *) handle;
318 if (!ctx) {
319 return AEE_EBADPARM;
320 }
321
322 if (!ctx->queue) {
323 FARF(ERROR, "Queue not open");
324 return AEE_EBADSTATE;
325 }
326
327 // Close queue. dspqueue_close() will also wait for callbacks to finish.
328 int err = dspqueue_close(ctx->queue);
329 ctx->queue = NULL;
330 if (err != 0) {
331 FARF(ERROR, "Queue close failed with 0x%08x", (unsigned) err);
332 return err;
333 }
334
335 if (ctx->worker_pool) {
336 // Release worker pool
337 worker_pool_release(&ctx->worker_pool);
338 }
339
340 for (int i = 0; i < ctx->n_threads; i++) {
341 dma_queue_delete(ctx->dma[i]);
342 }
343
344 vtcm_free(ctx);
345
346 return AEE_SUCCESS;
347}
348
349static void htp_error_callback(dspqueue_t queue, int error, void * context) {
350 // No errors expected on the DSP.
351 FARF(ERROR, "Error callback: 0x%08x", (unsigned) error);
352}
353
354struct profile_data {
355 uint64_t usecs;
356 uint64_t cycles;
357 uint64_t pkts;
358};
359
360static inline void profile_start(struct profile_data * d) {
361 d->usecs = HAP_perf_get_qtimer_count();
362 d->cycles = hex_get_cycles();
363 d->pkts = hex_get_pktcnt();
364}
365
366static inline void profile_stop(struct profile_data * d) {
367 d->usecs = HAP_perf_qtimer_count_to_us(HAP_perf_get_qtimer_count() - d->usecs);
368 d->cycles = hex_get_cycles() - d->cycles;
369 d->pkts = hex_get_pktcnt() - d->pkts;
370}
371
372static int send_htp_rsp(struct htp_context * c,
373 uint32_t op,
374 uint32_t status,
375 struct dspqueue_buffer * bufs,
376 size_t n_bufs,
377 struct profile_data * prof) {
378 // Prep response struct
379 struct htp_general_rsp rsp;
380 rsp.op = op;
381 rsp.status = status;
382 rsp.prof_usecs = prof->usecs;
383 rsp.prof_cycles = prof->cycles;
384 rsp.prof_pkts = prof->pkts;
385
386 int err = dspqueue_write(c->queue,
387 0, // Flags
388 n_bufs,
389 bufs, // Buffer references
390 sizeof(rsp),
391 (const uint8_t *) &rsp, // Message
392 DSPQUEUE_TIMEOUT_NONE);
393
394 if (err != 0) {
395 FARF(ERROR, "dspqueue_write failed: 0x%08x", (unsigned) err);
396 }
397
398 return err;
399}
400
401static void proc_matmul_req(struct htp_context * ctx,
402 struct htp_general_req * req,
403 struct dspqueue_buffer * bufs,
404 size_t n_bufs) {
405 struct dspqueue_buffer rsp_bufs[1];
406
407 // We had written to the output buffer, we'd also need to flush it
408 rsp_bufs[0].fd = bufs[2].fd;
409 rsp_bufs[0].ptr = bufs[2].ptr;
410 rsp_bufs[0].size = bufs[2].size;
411 rsp_bufs[0].offset = bufs[2].offset;
412 rsp_bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush HTP
413 DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
414
415 // Setup Op context
416 struct htp_ops_context octx = { 0 };
417 octx.ctx = ctx;
418 octx.src0 = req->src0;
419 octx.src1 = req->src1;
420 octx.dst = req->dst;
421 octx.flags = req->flags;
422 octx.op = req->op;
423
424 // Update data pointers
425 octx.src0.data = (uint32_t) bufs[0].ptr;
426 octx.src1.data = (uint32_t) bufs[1].ptr;
427 octx.dst.data = (uint32_t) bufs[2].ptr;
428 octx.n_threads = ctx->n_threads;
429
430 struct profile_data prof;
431 profile_start(&prof);
432
433 uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
434 if (vtcm_acquire(ctx) == AEE_SUCCESS) {
435 rsp_status = op_matmul(&octx);
436 vtcm_release(ctx);
437 }
438
439 profile_stop(&prof);
440 send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
441}
442
443static void proc_argsort_req(struct htp_context * ctx, struct htp_general_req * req, struct dspqueue_buffer * bufs) {
444 struct dspqueue_buffer rsp_bufs[1];
445
446 // We had written to the output buffer, we'd also need to flush it
447 rsp_bufs[0].fd = bufs[1].fd;
448 rsp_bufs[0].ptr = bufs[1].ptr;
449 rsp_bufs[0].offset = bufs[1].offset;
450 rsp_bufs[0].size = bufs[1].size;
451 rsp_bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush HTP
452 DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
453
454 // Setup Op context
455 struct htp_ops_context octx = { 0 };
456 octx.ctx = ctx;
457 octx.src0 = req->src0;
458 octx.dst = req->dst;
459 octx.flags = req->flags;
460 octx.op = req->op;
461
462 memcpy(octx.op_params, req->op_params, sizeof(octx.op_params));
463
464 // Update data pointers
465 octx.src0.data = (uint32_t) bufs[0].ptr;
466 octx.dst.data = (uint32_t) bufs[1].ptr;
467 octx.n_threads = ctx->n_threads;
468
469 struct profile_data prof;
470 profile_start(&prof);
471
472 uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
473 if (vtcm_acquire(ctx) == AEE_SUCCESS) {
474 rsp_status = op_argsort(&octx);
475 vtcm_release(ctx);
476 }
477
478 profile_stop(&prof);
479 send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
480}
481
482static void proc_cpy_req(struct htp_context * ctx, struct htp_general_req * req, struct dspqueue_buffer * bufs) {
483 struct dspqueue_buffer rsp_bufs[1];
484
485 // We had written to the output buffer, we'd also need to flush it
486 rsp_bufs[0].fd = bufs[1].fd;
487 rsp_bufs[0].ptr = bufs[1].ptr;
488 rsp_bufs[0].offset = bufs[1].offset;
489 rsp_bufs[0].size = bufs[1].size;
490 rsp_bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush HTP
491 DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
492
493 // Setup Op context
494 struct htp_ops_context octx = { 0 };
495 octx.ctx = ctx;
496 octx.src0 = req->src0;
497 octx.dst = req->dst;
498 octx.flags = req->flags;
499 octx.op = req->op;
500
501 // Update data pointers
502 octx.src0.data = (uint32_t) bufs[0].ptr;
503 octx.dst.data = (uint32_t) bufs[1].ptr;
504 octx.n_threads = ctx->n_threads;
505
506 struct profile_data prof;
507 profile_start(&prof);
508
509 uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
510 if (vtcm_acquire(ctx) == AEE_SUCCESS) {
511 rsp_status = op_cpy(&octx);
512 vtcm_release(ctx);
513 }
514
515 profile_stop(&prof);
516 send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
517}
518
519static void proc_get_rows_req(struct htp_context * ctx, struct htp_general_req * req, struct dspqueue_buffer * bufs) {
520 struct dspqueue_buffer rsp_bufs[1];
521
522 // We had written to the output buffer, we'd also need to flush it
523 rsp_bufs[0].fd = bufs[2].fd;
524 rsp_bufs[0].ptr = bufs[2].ptr;
525 rsp_bufs[0].offset = bufs[2].offset;
526 rsp_bufs[0].size = bufs[2].size;
527 rsp_bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush HTP
528 DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
529
530 // Setup Op context
531 struct htp_ops_context octx = { 0 };
532 octx.ctx = ctx;
533 octx.src0 = req->src0;
534 octx.src1 = req->src1;
535 octx.dst = req->dst;
536 octx.flags = req->flags;
537 octx.op = req->op;
538
539 // Update data pointers
540 octx.src0.data = (uint32_t) bufs[0].ptr;
541 octx.src1.data = (uint32_t) bufs[1].ptr;
542 octx.dst.data = (uint32_t) bufs[2].ptr;
543 octx.n_threads = ctx->n_threads;
544
545 struct profile_data prof;
546 profile_start(&prof);
547
548 uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
549 if (vtcm_acquire(ctx) == AEE_SUCCESS) {
550 rsp_status = op_get_rows(&octx);
551 vtcm_release(ctx);
552 }
553
554 profile_stop(&prof);
555 send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
556}
557
558static void proc_matmul_id_req(struct htp_context * ctx,
559 struct htp_general_req * req,
560 struct dspqueue_buffer * bufs,
561 size_t n_bufs) {
562 struct dspqueue_buffer rsp_bufs[1];
563
564 // We had written to the output buffer, we'd also need to flush it
565 rsp_bufs[0].fd = bufs[3].fd;
566 rsp_bufs[0].ptr = bufs[3].ptr;
567 rsp_bufs[0].size = bufs[3].size;
568 rsp_bufs[0].offset = bufs[3].offset;
569 rsp_bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush HTP
570 DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
571
572 // Setup Op context
573 struct htp_ops_context octx = { 0 };
574 octx.ctx = ctx;
575 octx.src0 = req->src0;
576 octx.src1 = req->src1;
577 octx.src2 = req->src2;
578 octx.dst = req->dst;
579 octx.flags = req->flags;
580 octx.op = req->op;
581
582 // Update data pointers
583 octx.src0.data = (uint32_t) bufs[0].ptr;
584 octx.src1.data = (uint32_t) bufs[1].ptr;
585 octx.src2.data = (uint32_t) bufs[2].ptr;
586 octx.dst.data = (uint32_t) bufs[3].ptr;
587 octx.n_threads = ctx->n_threads;
588
589 struct profile_data prof;
590 profile_start(&prof);
591
592 uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
593 if (vtcm_acquire(ctx) == AEE_SUCCESS) {
594 rsp_status = op_matmul_id(&octx);
595 vtcm_release(ctx);
596 }
597
598 profile_stop(&prof);
599 send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
600}
601
602static void proc_binary_req(struct htp_context * ctx, struct htp_general_req * req, struct dspqueue_buffer * bufs) {
603 struct dspqueue_buffer rsp_bufs[1];
604
605 // We had written to the output buffer, we'd also need to flush it
606 rsp_bufs[0].fd = bufs[2].fd;
607 rsp_bufs[0].ptr = bufs[2].ptr;
608 rsp_bufs[0].offset = bufs[2].offset;
609 rsp_bufs[0].size = bufs[2].size;
610 rsp_bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush HTP
611 DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
612
613 // Setup Op context
614 struct htp_ops_context octx = { 0 };
615 octx.ctx = ctx;
616 octx.src0 = req->src0;
617 octx.src1 = req->src1;
618 octx.dst = req->dst;
619 octx.flags = req->flags;
620 octx.op = req->op;
621
622 // Update data pointers
623 octx.src0.data = (uint32_t) bufs[0].ptr;
624 octx.src1.data = (uint32_t) bufs[1].ptr;
625 octx.dst.data = (uint32_t) bufs[2].ptr;
626 octx.n_threads = ctx->n_threads;
627
628 struct profile_data prof;
629 profile_start(&prof);
630
631 uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
632 if (vtcm_acquire(ctx) == AEE_SUCCESS) {
633 rsp_status = op_binary(&octx);
634 vtcm_release(ctx);
635 }
636
637 profile_stop(&prof);
638 send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
639}
640
641static void proc_add_id_req(struct htp_context * ctx, struct htp_general_req * req, struct dspqueue_buffer * bufs) {
642 struct dspqueue_buffer rsp_bufs[1];
643
644 // We had written to the output buffer, we'd also need to flush it
645 rsp_bufs[0].fd = bufs[3].fd;
646 rsp_bufs[0].ptr = bufs[3].ptr;
647 rsp_bufs[0].offset = bufs[3].offset;
648 rsp_bufs[0].size = bufs[3].size;
649 rsp_bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush HTP
650 DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
651
652 // Setup Op context
653 struct htp_ops_context octx = { 0 };
654 octx.ctx = ctx;
655 octx.src0 = req->src0;
656 octx.src1 = req->src1;
657 octx.src2 = req->src2;
658 octx.dst = req->dst;
659 octx.flags = req->flags;
660 octx.op = req->op;
661
662 // Update data pointers
663 octx.src0.data = (uint32_t) bufs[0].ptr;
664 octx.src1.data = (uint32_t) bufs[1].ptr;
665 octx.src2.data = (uint32_t) bufs[2].ptr;
666 octx.dst.data = (uint32_t) bufs[3].ptr;
667 octx.n_threads = ctx->n_threads;
668
669 struct profile_data prof;
670 profile_start(&prof);
671
672 uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
673 if (vtcm_acquire(ctx) == AEE_SUCCESS) {
674 rsp_status = op_binary(&octx);
675 vtcm_release(ctx);
676 }
677
678 profile_stop(&prof);
679 send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
680}
681
682static void proc_unary_req(struct htp_context * ctx, struct htp_general_req * req, struct dspqueue_buffer * bufs) {
683 struct dspqueue_buffer rsp_bufs[HTP_MAX_PACKET_BUFFERS];
684
685 // We had written to the output buffer, we'd also need to flush it
686 rsp_bufs[0].fd = bufs[1].fd;
687 rsp_bufs[0].ptr = bufs[1].ptr;
688 rsp_bufs[0].offset = bufs[1].offset;
689 rsp_bufs[0].size = bufs[1].size;
690 rsp_bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush HTP
691 DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
692
693 // Setup Op context
694 struct htp_ops_context octx = { 0 };
695 octx.ctx = ctx;
696 octx.src0 = req->src0;
697 octx.dst = req->dst;
698 octx.flags = req->flags;
699 octx.op = req->op;
700
701 memcpy(octx.op_params, req->op_params, sizeof(octx.op_params));
702
703 // Update data pointers
704 octx.src0.data = (uint32_t) bufs[0].ptr;
705 octx.dst.data = (uint32_t) bufs[1].ptr;
706 octx.n_threads = ctx->n_threads;
707
708 struct profile_data prof;
709 profile_start(&prof);
710
711 uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
712 if (vtcm_acquire(ctx) == AEE_SUCCESS) {
713 rsp_status = op_unary(&octx);
714 vtcm_release(ctx);
715 }
716
717 profile_stop(&prof);
718 send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
719}
720
721static void proc_sum_rows_req(struct htp_context * ctx, struct htp_general_req * req, struct dspqueue_buffer * bufs) {
722 struct dspqueue_buffer rsp_bufs[HTP_MAX_PACKET_BUFFERS];
723
724 // We had written to the output buffer, we'd also need to flush it
725 rsp_bufs[0].fd = bufs[1].fd;
726 rsp_bufs[0].ptr = bufs[1].ptr;
727 rsp_bufs[0].offset = bufs[1].offset;
728 rsp_bufs[0].size = bufs[1].size;
729 rsp_bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush HTP
730 DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
731
732 // Setup Op context
733 struct htp_ops_context octx = { 0 };
734 octx.ctx = ctx;
735 octx.src0 = req->src0;
736 octx.dst = req->dst;
737 octx.flags = req->flags;
738 octx.op = req->op;
739
740 memcpy(octx.op_params, req->op_params, sizeof(octx.op_params));
741
742 // Update data pointers
743 octx.src0.data = (uint32_t) bufs[0].ptr;
744 octx.dst.data = (uint32_t) bufs[1].ptr;
745 octx.n_threads = ctx->n_threads;
746
747 struct profile_data prof;
748 profile_start(&prof);
749
750 uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
751 if (vtcm_acquire(ctx) == AEE_SUCCESS) {
752 rsp_status = op_sum_rows(&octx);
753 vtcm_release(ctx);
754 }
755
756 profile_stop(&prof);
757 send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
758}
759
760static void proc_activations_req(struct htp_context * ctx,
761 struct htp_general_req * req,
762 struct dspqueue_buffer * bufs,
763 uint32_t n_bufs) {
764 struct dspqueue_buffer rsp_bufs[HTP_MAX_PACKET_BUFFERS];
765
766 int write_idx = (n_bufs == 3) ? 2 : 1;
767
768 // We had written to the output buffer, we'd also need to flush it
769 rsp_bufs[0].fd = bufs[write_idx].fd;
770 rsp_bufs[0].ptr = bufs[write_idx].ptr;
771 rsp_bufs[0].offset = bufs[write_idx].offset;
772 rsp_bufs[0].size = bufs[write_idx].size;
773 rsp_bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush HTP
774 DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
775
776 // Setup Op context
777 struct htp_ops_context octx = { 0 };
778 octx.ctx = ctx;
779 octx.src0 = req->src0;
780 if (3 == n_bufs) {
781 octx.src1 = req->src1;
782 }
783 octx.dst = req->dst;
784 octx.flags = req->flags;
785 octx.op = req->op;
786
787 memcpy(octx.op_params, req->op_params, sizeof(octx.op_params));
788
789 // Update data pointers
790 octx.src0.data = (uint32_t) bufs[0].ptr;
791 if (3 == n_bufs) {
792 octx.src1.data = (uint32_t) bufs[1].ptr;
793 octx.dst.data = (uint32_t) bufs[2].ptr;
794 } else {
795 octx.dst.data = (uint32_t) bufs[1].ptr;
796 }
797 octx.n_threads = ctx->n_threads;
798
799 struct profile_data prof;
800 profile_start(&prof);
801
802 uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
803 if (vtcm_acquire(ctx) == AEE_SUCCESS) {
804 if (octx.op == HTP_OP_SOFTMAX) {
805 rsp_status = op_softmax(&octx);
806 } else {
807 rsp_status = op_activations(&octx);
808 }
809 vtcm_release(ctx);
810 }
811
812 profile_stop(&prof);
813 send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
814}
815
816static void proc_rope_req(struct htp_context * ctx,
817 struct htp_general_req * req,
818 struct dspqueue_buffer * bufs,
819 uint32_t n_bufs) {
820 struct dspqueue_buffer rsp_bufs[HTP_MAX_PACKET_BUFFERS];
821
822 int write_idx = n_bufs - 1;
823
824 // We had written to the output buffer, we'd also need to flush it
825 rsp_bufs[0].fd = bufs[write_idx].fd;
826 rsp_bufs[0].ptr = bufs[write_idx].ptr;
827 rsp_bufs[0].offset = bufs[write_idx].offset;
828 rsp_bufs[0].size = bufs[write_idx].size;
829 rsp_bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush HTP
830 DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
831
832 // Setup Op context
833 struct htp_ops_context octx = { 0 };
834 octx.ctx = ctx;
835 octx.src0 = req->src0;
836 octx.src1 = req->src1;
837 if (4 == n_bufs) {
838 octx.src2 = req->src2;
839 }
840 octx.dst = req->dst;
841 octx.flags = req->flags;
842 octx.op = req->op;
843
844 memcpy(octx.op_params, req->op_params, sizeof(octx.op_params));
845
846 // Update data pointers
847 octx.src0.data = (uint32_t) bufs[0].ptr;
848 octx.src1.data = (uint32_t) bufs[1].ptr;
849 if (4 == n_bufs) {
850 octx.src2.data = (uint32_t) bufs[2].ptr;
851 octx.dst.data = (uint32_t) bufs[3].ptr;
852 } else {
853 octx.dst.data = (uint32_t) bufs[2].ptr;
854 }
855 octx.n_threads = ctx->n_threads;
856
857 struct profile_data prof;
858 profile_start(&prof);
859
860 uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
861 if (vtcm_acquire(ctx) == AEE_SUCCESS) {
862 rsp_status = op_rope(&octx);
863 vtcm_release(ctx);
864 }
865
866 profile_stop(&prof);
867 send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
868}
869
870static void proc_set_rows_req(struct htp_context * ctx, struct htp_general_req * req, struct dspqueue_buffer * bufs) {
871 struct dspqueue_buffer rsp_bufs[1];
872
873 // We had written to the output buffer, we'd also need to flush it
874 rsp_bufs[0].fd = bufs[2].fd;
875 rsp_bufs[0].ptr = bufs[2].ptr;
876 rsp_bufs[0].offset = bufs[2].offset;
877 rsp_bufs[0].size = bufs[2].size;
878 rsp_bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush HTP
879 DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
880
881 // Setup Op context
882 struct htp_ops_context octx = { 0 };
883 octx.ctx = ctx;
884 octx.src0 = req->src0;
885 octx.src1 = req->src1;
886 octx.dst = req->dst;
887 octx.flags = req->flags;
888 octx.op = req->op;
889
890 // Update data pointers
891 octx.src0.data = (uint32_t) bufs[0].ptr;
892 octx.src1.data = (uint32_t) bufs[1].ptr;
893 octx.dst.data = (uint32_t) bufs[2].ptr;
894 octx.n_threads = ctx->n_threads;
895
896 struct profile_data prof;
897 profile_start(&prof);
898
899 uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
900 if (vtcm_acquire(ctx) == AEE_SUCCESS) {
901 rsp_status = op_set_rows(&octx);
902 vtcm_release(ctx);
903 }
904
905 profile_stop(&prof);
906 send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
907}
908
909static void proc_flash_attn_ext_req(struct htp_context * ctx,
910 struct htp_general_req * req,
911 struct dspqueue_buffer * bufs,
912 uint32_t n_bufs) {
913 // Setup Op context
914 struct htp_ops_context octx;
915 memset(&octx, 0, sizeof(octx));
916
917 octx.ctx = ctx;
918 octx.n_threads = ctx->n_threads;
919
920 octx.src0 = req->src0;
921 octx.src1 = req->src1;
922 octx.src2 = req->src2;
923 octx.src3 = req->src3;
924 octx.src4 = req->src4;
925 octx.dst = req->dst;
926 octx.flags = req->flags;
927 octx.op = req->op;
928
929 memcpy(octx.op_params, req->op_params, sizeof(octx.op_params));
930
931 // Update data pointers
932 octx.src0.data = (uint32_t) bufs[0].ptr;
933 octx.src1.data = (uint32_t) bufs[1].ptr;
934 octx.src2.data = (uint32_t) bufs[2].ptr;
935
936 int last_buf = 3;
937
938 if (octx.src3.ne[0]) {
939 octx.src3.data = (uint32_t) bufs[last_buf++].ptr; // mask is valid
940 }
941
942 if (octx.src4.ne[0]) {
943 octx.src4.data = (uint32_t) bufs[last_buf++].ptr; // sinks is valid
944 }
945
946 octx.dst.data = (uint32_t) bufs[last_buf].ptr;
947
948 struct profile_data prof;
949 profile_start(&prof);
950
951 uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
952 if (vtcm_acquire(ctx) == AEE_SUCCESS) {
953 rsp_status = op_flash_attn_ext(&octx);
954 vtcm_release(ctx);
955 }
956
957 profile_stop(&prof);
958
959 struct dspqueue_buffer rsp_buf = bufs[last_buf];
960 rsp_buf.flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush HTP
961 DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
962
963 send_htp_rsp(ctx, req->op, rsp_status, &bufs[last_buf], 1, &prof);
964}
965
966static void htp_packet_callback(dspqueue_t queue, int error, void * context) {
967 struct htp_context * ctx = (struct htp_context *) context;
968
969 // Repeatedly read packets from the queue until it's empty. We don't
970 // necessarily get a separate callback for each packet, and new packets
971 // may arrive while we're processing the previous one. This ensures we
972 // keep the DSP busy as much as possible and avoid waiting for the CPU.
973
974 while (1) {
975 struct htp_general_req req;
976 uint32_t req_size;
977
978 struct dspqueue_buffer bufs[HTP_MAX_PACKET_BUFFERS];
979 uint32_t n_bufs;
980 uint32_t flags;
981
982 // Read packet from queue
983 int err = dspqueue_read_noblock(queue, &flags,
984 HTP_MAX_PACKET_BUFFERS, // Maximum number of buffer references
985 &n_bufs, // Number of buffer references
986 bufs, // Buffer references
987 sizeof(req), // Max message length
988 &req_size, // Message length
989 (uint8_t *) &req); // Message
990
991 if (err == AEE_EWOULDBLOCK) {
992 // Consumed all packets available for now
993 return;
994 }
995
996 if (err != 0) {
997 FARF(ERROR, "dspqueue_read_noblock failed: 0x%08x", (unsigned) err);
998 return;
999 }
1000
1001 if (req_size != sizeof(req)) {
1002 FARF(ERROR, "Invalid request size");
1003 continue;
1004 }
1005
1006 if (req.flags & HTP_OPFLAGS_EARLY_WAKEUP) {
1007 // Host wants early notification
1008 dspqueue_write_early_wakeup_noblock(ctx->queue, 10, 0);
1009 }
1010
1011 // Process packet based on its message type
1012 switch (req.op) {
1013 case HTP_OP_MUL_MAT:
1014 if (n_bufs != 3) {
1015 FARF(ERROR, "Bad matmul-req buffer list");
1016 continue;
1017 }
1018 proc_matmul_req(ctx, &req, bufs, n_bufs);
1019 break;
1020
1021 case HTP_OP_MUL_MAT_ID:
1022 if (n_bufs != 4) {
1023 FARF(ERROR, "Bad matmul-id-req buffer list");
1024 continue;
1025 }
1026 proc_matmul_id_req(ctx, &req, bufs, n_bufs);
1027 break;
1028
1029 case HTP_OP_MUL:
1030 case HTP_OP_ADD:
1031 case HTP_OP_SUB:
1032 case HTP_OP_DIV:
1033 if (n_bufs != 3) {
1034 FARF(ERROR, "Bad binary-req buffer list");
1035 continue;
1036 }
1037 proc_binary_req(ctx, &req, bufs);
1038 break;
1039
1040 case HTP_OP_RMS_NORM:
1041 case HTP_OP_SCALE:
1042 if (n_bufs != 2) {
1043 FARF(ERROR, "Bad unary-req buffer list");
1044 continue;
1045 }
1046
1047 proc_unary_req(ctx, &req, bufs);
1048 break;
1049
1050 case HTP_OP_SQR:
1051 case HTP_OP_SQRT:
1052 if (n_bufs != 2) {
1053 FARF(ERROR, "Bad unary-req buffer list");
1054 continue;
1055 }
1056
1057 proc_unary_req(ctx, &req, bufs);
1058 break;
1059
1060 case HTP_OP_SUM_ROWS:
1061 if (n_bufs != 2) {
1062 FARF(ERROR, "Bad unary-req buffer list");
1063 continue;
1064 }
1065
1066 proc_sum_rows_req(ctx, &req, bufs);
1067 break;
1068
1069 case HTP_OP_UNARY_SILU:
1070 case HTP_OP_UNARY_GELU:
1071 if (n_bufs != 2) {
1072 FARF(ERROR, "Bad act-req buffer list");
1073 continue;
1074 }
1075 proc_activations_req(ctx, &req, bufs, n_bufs);
1076 break;
1077
1078 case HTP_OP_GLU_SWIGLU:
1079 case HTP_OP_GLU_SWIGLU_OAI:
1080 case HTP_OP_SOFTMAX:
1081 case HTP_OP_GLU_GEGLU:
1082 if ((n_bufs != 2) && (n_bufs != 3)) {
1083 FARF(ERROR, "Bad act-req buffer list");
1084 continue;
1085 }
1086 proc_activations_req(ctx, &req, bufs, n_bufs);
1087 break;
1088
1089 case HTP_OP_ADD_ID:
1090 if (n_bufs != 4) {
1091 FARF(ERROR, "Bad add-id-req buffer list");
1092 continue;
1093 }
1094 proc_add_id_req(ctx, &req, bufs);
1095 break;
1096
1097 case HTP_OP_ROPE:
1098 if ((n_bufs != 3) && (n_bufs != 4)) {
1099 FARF(ERROR, "Bad rope-req buffer list");
1100 continue;
1101 }
1102 proc_rope_req(ctx, &req, bufs, n_bufs);
1103 break;
1104
1105 case HTP_OP_FLASH_ATTN_EXT:
1106 if (!(n_bufs >= 4 && n_bufs <= 6)) {
1107 FARF(ERROR, "Bad flash-attn-ext-req buffer list");
1108 continue;
1109 }
1110 proc_flash_attn_ext_req(ctx, &req, bufs, n_bufs);
1111 break;
1112
1113 case HTP_OP_SET_ROWS:
1114 if (n_bufs != 3) {
1115 FARF(ERROR, "Bad set-rows-req buffer list");
1116 continue;
1117 }
1118 proc_set_rows_req(ctx, &req, bufs);
1119 break;
1120
1121 case HTP_OP_GET_ROWS:
1122 if (n_bufs != 3) {
1123 FARF(ERROR, "Bad get-rows-req buffer list");
1124 continue;
1125 }
1126 proc_get_rows_req(ctx, &req, bufs);
1127 break;
1128
1129 case HTP_OP_CPY:
1130 if (n_bufs != 2) {
1131 FARF(ERROR, "Bad cpy-req buffer list");
1132 continue;
1133 }
1134 proc_cpy_req(ctx, &req, bufs);
1135 break;
1136
1137 case HTP_OP_ARGSORT:
1138 if (n_bufs != 2) {
1139 FARF(ERROR, "Bad argsort-req buffer list");
1140 continue;
1141 }
1142 proc_argsort_req(ctx, &req, bufs);
1143 break;
1144
1145 default:
1146 FARF(ERROR, "Unknown Op %u", req.op);
1147 break;
1148 }
1149 }
1150}