summaryrefslogtreecommitdiff
path: root/llama.cpp/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'llama.cpp/scripts')
-rwxr-xr-xllama.cpp/scripts/apple/validate-apps.sh5
-rwxr-xr-xllama.cpp/scripts/apple/validate-ios.sh820
-rwxr-xr-xllama.cpp/scripts/apple/validate-macos.sh781
-rwxr-xr-xllama.cpp/scripts/apple/validate-tvos.sh813
-rwxr-xr-xllama.cpp/scripts/apple/validate-visionos.sh811
-rwxr-xr-xllama.cpp/scripts/bench-models.sh82
-rwxr-xr-xllama.cpp/scripts/build-info.sh30
-rwxr-xr-xllama.cpp/scripts/check-requirements.sh179
-rwxr-xr-xllama.cpp/scripts/compare-commits.sh66
-rwxr-xr-xllama.cpp/scripts/compare-llama-bench.py1093
-rw-r--r--llama.cpp/scripts/compare-logprobs.py281
-rwxr-xr-xllama.cpp/scripts/create_ops_docs.py201
-rwxr-xr-xllama.cpp/scripts/debug-test.sh202
-rwxr-xr-xllama.cpp/scripts/fetch_server_test_models.py105
-rwxr-xr-xllama.cpp/scripts/gen-authors.sh9
-rw-r--r--llama.cpp/scripts/gen-unicode-data.py196
-rw-r--r--llama.cpp/scripts/get-flags.mk38
-rwxr-xr-xllama.cpp/scripts/get-hellaswag.sh10
-rwxr-xr-xllama.cpp/scripts/get-pg.sh70
-rwxr-xr-xllama.cpp/scripts/get-wikitext-103.sh10
-rwxr-xr-xllama.cpp/scripts/get-wikitext-2.sh11
-rwxr-xr-xllama.cpp/scripts/get-winogrande.sh10
-rwxr-xr-xllama.cpp/scripts/get_chat_template.py76
-rwxr-xr-xllama.cpp/scripts/hf.sh112
-rw-r--r--llama.cpp/scripts/install-oneapi.bat19
-rwxr-xr-xllama.cpp/scripts/jinja/jinja-tester.py504
-rw-r--r--llama.cpp/scripts/jinja/requirements.txt2
-rwxr-xr-xllama.cpp/scripts/pr2wt.sh79
-rw-r--r--llama.cpp/scripts/serve-static.js110
-rwxr-xr-xllama.cpp/scripts/server-bench.py297
-rw-r--r--llama.cpp/scripts/snapdragon/adb/llama-cli.farf1
-rwxr-xr-xllama.cpp/scripts/snapdragon/adb/run-bench.sh52
-rwxr-xr-xllama.cpp/scripts/snapdragon/adb/run-cli.sh59
-rwxr-xr-xllama.cpp/scripts/snapdragon/adb/run-completion.sh59
-rwxr-xr-xllama.cpp/scripts/snapdragon/adb/run-mtmd.sh68
-rwxr-xr-xllama.cpp/scripts/snapdragon/adb/run-tool.sh54
-rw-r--r--llama.cpp/scripts/snapdragon/qdc/readme.md1
-rw-r--r--llama.cpp/scripts/snapdragon/qdc/requirements.txt25
-rw-r--r--llama.cpp/scripts/snapdragon/qdc/tests/test_bench.py63
-rw-r--r--llama.cpp/scripts/snapdragon/windows/run-bench.ps140
-rw-r--r--llama.cpp/scripts/snapdragon/windows/run-cli.ps153
-rw-r--r--llama.cpp/scripts/snapdragon/windows/run-tool.ps156
-rw-r--r--llama.cpp/scripts/snapdragon/windows/setup-build.ps1105
-rwxr-xr-xllama.cpp/scripts/sync-ggml-am.sh158
-rw-r--r--llama.cpp/scripts/sync-ggml.last1
-rwxr-xr-xllama.cpp/scripts/sync-ggml.sh20
-rwxr-xr-xllama.cpp/scripts/sync_vendor.py40
-rwxr-xr-xllama.cpp/scripts/tool_bench.py379
-rwxr-xr-xllama.cpp/scripts/tool_bench.sh66
-rwxr-xr-xllama.cpp/scripts/verify-checksum-models.py84
-rw-r--r--llama.cpp/scripts/xxd.cmake16
51 files changed, 8422 insertions, 0 deletions
diff --git a/llama.cpp/scripts/apple/validate-apps.sh b/llama.cpp/scripts/apple/validate-apps.sh
new file mode 100755
index 0000000..f047575
--- /dev/null
+++ b/llama.cpp/scripts/apple/validate-apps.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+./scripts/apple/validate-ios.sh
+./scripts/apple/validate-macos.sh
+./scripts/apple/validate-visionos.sh
+./scripts/apple/validate-tvos.sh
diff --git a/llama.cpp/scripts/apple/validate-ios.sh b/llama.cpp/scripts/apple/validate-ios.sh
new file mode 100755
index 0000000..50800d8
--- /dev/null
+++ b/llama.cpp/scripts/apple/validate-ios.sh
@@ -0,0 +1,820 @@
+#!/usr/bin/env bash
+# validate-ios.sh - Validate iOS Application with embedded llama.xcframework using SwiftUI
+
+# Authentication options (optional) (can be set via environment variables)
+# To use: export APPLE_ID=your.email@example.com
+# export APPLE_PASSWORD=your-app-specific-password
+# ./validate-ios.sh
+APPLE_ID=${APPLE_ID:-""}
+APPLE_PASSWORD=${APPLE_PASSWORD:-""}
+
+# Ensure the script exits on error
+set -e
+
+# Function to print usage instructions
+print_usage() {
+ echo "Usage: ./validate-ios.sh [OPTIONS]"
+ echo ""
+ echo "Options:"
+ echo " --help Show this help message"
+ echo " --apple-id EMAIL Apple ID email for validation"
+ echo " --apple-password PWD App-specific password for Apple ID"
+ echo ""
+ echo "Environment variables:"
+ echo " APPLE_ID Apple ID email for validation"
+ echo " APPLE_PASSWORD App-specific password for Apple ID"
+ echo ""
+ echo "Notes:"
+ echo " - Command line options take precedence over environment variables"
+ echo " - Authentication is optional. If not provided, alternative validation will be performed"
+ echo " - For APPLE_PASSWORD, use an app-specific password generated at https://appleid.apple.com/account/manage"
+}
+
+# Parse command line arguments
+while [[ $# -gt 0 ]]; do
+ case $1 in
+ --help)
+ print_usage
+ exit 0
+ ;;
+ --apple-id)
+ APPLE_ID="$2"
+ shift 2
+ ;;
+ --apple-password)
+ APPLE_PASSWORD="$2"
+ shift 2
+ ;;
+ *)
+ echo "Unknown option: $1"
+ print_usage
+ exit 1
+ ;;
+ esac
+done
+
+# Function to clean up in case of error
+cleanup() {
+ # Don't clean up temp files on error to help with debugging
+ echo "===== iOS Validation Process Failed ====="
+ exit 1
+}
+
+# Set up trap to call cleanup function on error
+trap cleanup ERR
+
+set -e # Exit on any error
+
+ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../.." && pwd )"
+BUILD_DIR="${ROOT_DIR}/validation-builds/ios"
+
+# Configuration
+APP_NAME="iOSLlamaTest"
+BUNDLE_ID="org.ggml.iOSLlamaTest"
+XCFRAMEWORK_PATH="${ROOT_DIR}/build-apple/llama.xcframework"
+TEMP_DIR="${BUILD_DIR}/temp"
+ARCHIVE_PATH="${BUILD_DIR}/${APP_NAME}.xcarchive"
+IPA_PATH="${BUILD_DIR}/${APP_NAME}.ipa"
+VALIDATION_DIR="${BUILD_DIR}/validation"
+
+# Create necessary directories
+mkdir -p "${BUILD_DIR}"
+mkdir -p "${TEMP_DIR}"
+mkdir -p "${VALIDATION_DIR}"
+
+echo "===== iOS Validation Process Started ====="
+
+# 1. Create a simple test app project
+echo "Creating test iOS app project..."
+mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}"
+cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Info.plist" << EOF
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+ <key>CFBundleDevelopmentRegion</key>
+ <string>en</string>
+ <key>CFBundleExecutable</key>
+ <string>${APP_NAME}</string>
+ <key>CFBundleIdentifier</key>
+ <string>${BUNDLE_ID}</string>
+ <key>CFBundleInfoDictionaryVersion</key>
+ <string>6.0</string>
+ <key>CFBundleName</key>
+ <string>${APP_NAME}</string>
+ <key>CFBundlePackageType</key>
+ <string>APPL</string>
+ <key>CFBundleShortVersionString</key>
+ <string>1.0</string>
+ <key>CFBundleVersion</key>
+ <string>1</string>
+ <key>LSRequiresIPhoneOS</key>
+ <true/>
+ <key>UILaunchScreen</key>
+ <dict/>
+ <key>UIRequiredDeviceCapabilities</key>
+ <array>
+ <string>armv7</string>
+ </array>
+ <key>UISupportedInterfaceOrientations</key>
+ <array>
+ <string>UIInterfaceOrientationPortrait</string>
+ </array>
+</dict>
+</plist>
+EOF
+
+# Create SwiftUI app files
+mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources"
+
+# Create App.swift
+cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/App.swift" << EOF
+import SwiftUI
+import llama
+
+@main
+struct LlamaTestApp: App {
+ var body: some Scene {
+ WindowGroup {
+ ContentView()
+ }
+ }
+}
+EOF
+
+# Create ContentView.swift
+cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/ContentView.swift" << EOF
+import SwiftUI
+import llama
+
+struct ContentView: View {
+ // Test that we can initialize a llama context params struct
+ let params = llama_context_default_params()
+
+ var body: some View {
+ VStack(spacing: 20) {
+ Text("Llama Framework Test")
+ .font(.largeTitle)
+ .padding()
+
+ Text("llama_context_default_params() created successfully")
+ .font(.headline)
+ .multilineTextAlignment(.center)
+ .padding()
+
+ // Display some param values to confirm the framework is working
+ Text("n_ctx: \(params.n_ctx)")
+ .font(.body)
+
+ Text("n_batch: \(params.n_batch)")
+ .font(.body)
+
+ Spacer()
+ }
+ .padding()
+ }
+}
+
+struct ContentView_Previews: PreviewProvider {
+ static var previews: some View {
+ ContentView()
+ }
+}
+EOF
+
+# Create project.pbxproj, fixing the framework search paths issues
+mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj"
+cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
+// !$*UTF8*$!
+{
+ archiveVersion = 1;
+ classes = {
+ };
+ objectVersion = 54;
+ objects = {
+
+/* Begin PBXBuildFile section */
+ 11111111111111111111111 /* App.swift in Sources */ = {isa = PBXBuildFile; fileRef = 22222222222222222222222; };
+ 33333333333333333333333 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 44444444444444444444444; };
+ 55555555555555555555555 /* llama.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; };
+ 77777777777777777777777 /* llama.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; };
+/* End PBXBuildFile section */
+
+/* Begin PBXCopyFilesBuildPhase section */
+ 88888888888888888888888 /* Embed Frameworks */ = {
+ isa = PBXCopyFilesBuildPhase;
+ buildActionMask = 2147483647;
+ dstPath = "";
+ dstSubfolderSpec = 10;
+ files = (
+ 77777777777777777777777 /* llama.xcframework in Embed Frameworks */,
+ );
+ name = "Embed Frameworks";
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXCopyFilesBuildPhase section */
+
+/* Begin PBXFileReference section */
+EOF
+
+# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
+cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
+ 99999999999999999999999 /* ${APP_NAME}.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "${APP_NAME}.app"; sourceTree = BUILT_PRODUCTS_DIR; };
+ 22222222222222222222222 /* App.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = App.swift; sourceTree = "<group>"; };
+ 44444444444444444444444 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
+ AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
+ 66666666666666666666666 /* llama.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; path = llama.xcframework; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+EOF
+
+# Add the rest of the project file with fixed framework search paths
+cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
+/* Begin PBXFrameworksBuildPhase section */
+ BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */ = {
+ isa = PBXFrameworksBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 55555555555555555555555 /* llama.xcframework in Frameworks */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+EOF
+
+# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
+cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
+ CCCCCCCCCCCCCCCCCCCCCCCC /* Products */ = {
+ isa = PBXGroup;
+ children = (
+ 99999999999999999999999 /* ${APP_NAME}.app */,
+ );
+ name = Products;
+ sourceTree = "<group>";
+ };
+EOF
+
+cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
+ DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */ = {
+ isa = PBXGroup;
+ children = (
+ 66666666666666666666666 /* llama.xcframework */,
+ );
+ name = Frameworks;
+ sourceTree = "<group>";
+ };
+ EEEEEEEEEEEEEEEEEEEEEEEE = {
+ isa = PBXGroup;
+ children = (
+ FFFFFFFFFFFFFFFFFFFFFFFF /* iOSLlamaTest */,
+ CCCCCCCCCCCCCCCCCCCCCCCC /* Products */,
+ DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */,
+ );
+ sourceTree = "<group>";
+ };
+ FFFFFFFFFFFFFFFFFFFFFFFF /* iOSLlamaTest */ = {
+ isa = PBXGroup;
+ children = (
+ 1111111111111111111111AA /* Sources */,
+ AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */,
+ );
+ path = "iOSLlamaTest";
+ sourceTree = "<group>";
+ };
+ 1111111111111111111111AA /* Sources */ = {
+ isa = PBXGroup;
+ children = (
+ 22222222222222222222222 /* App.swift */,
+ 44444444444444444444444 /* ContentView.swift */,
+ );
+ path = Sources;
+ sourceTree = "<group>";
+ };
+/* End PBXGroup section */
+EOF
+
+# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
+cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
+/* Begin PBXNativeTarget section */
+ 3333333333333333333333AA /* ${APP_NAME} */ = {
+ isa = PBXNativeTarget;
+ buildConfigurationList = 4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */;
+ buildPhases = (
+ 5555555555555555555555AA /* Sources */,
+ BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */,
+ 6666666666666666666666AA /* Resources */,
+ 88888888888888888888888 /* Embed Frameworks */,
+ );
+ buildRules = (
+ );
+ dependencies = (
+ );
+ name = "${APP_NAME}";
+ productName = "${APP_NAME}";
+ productReference = 99999999999999999999999 /* ${APP_NAME}.app */;
+ productType = "com.apple.product-type.application";
+ };
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+ 7777777777777777777777AA /* Project object */ = {
+ isa = PBXProject;
+ attributes = {
+ LastSwiftUpdateCheck = 1240;
+ LastUpgradeCheck = 1240;
+ TargetAttributes = {
+ 3333333333333333333333AA = {
+ CreatedOnToolsVersion = 12.4;
+ };
+ };
+ };
+ buildConfigurationList = 8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */;
+ compatibilityVersion = "Xcode 12.0";
+ developmentRegion = en;
+ hasScannedForEncodings = 0;
+ knownRegions = (
+ en,
+ Base,
+ );
+ mainGroup = EEEEEEEEEEEEEEEEEEEEEEEE;
+ productRefGroup = CCCCCCCCCCCCCCCCCCCCCCCC /* Products */;
+ projectDirPath = "";
+ projectRoot = "";
+ targets = (
+ 3333333333333333333333AA /* ${APP_NAME} */,
+ );
+ };
+/* End PBXProject section */
+EOF
+
+# Add the rest of the file with correct FRAMEWORK_SEARCH_PATHS
+cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
+/* Begin PBXResourcesBuildPhase section */
+ 6666666666666666666666AA /* Resources */ = {
+ isa = PBXResourcesBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXResourcesBuildPhase section */
+
+/* Begin PBXSourcesBuildPhase section */
+ 5555555555555555555555AA /* Sources */ = {
+ isa = PBXSourcesBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 33333333333333333333333 /* ContentView.swift in Sources */,
+ 11111111111111111111111 /* App.swift in Sources */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+ 9999999999999999999999AA /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ALWAYS_SEARCH_USER_PATHS = NO;
+ CLANG_ANALYZER_NONNULL = YES;
+ CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+ CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+ CLANG_CXX_LIBRARY = "libc++";
+ CLANG_ENABLE_MODULES = YES;
+ CLANG_ENABLE_OBJC_ARC = YES;
+ CLANG_ENABLE_OBJC_WEAK = YES;
+ CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+ CLANG_WARN_BOOL_CONVERSION = YES;
+ CLANG_WARN_COMMA = YES;
+ CLANG_WARN_CONSTANT_CONVERSION = YES;
+ CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+ CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+ CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+ CLANG_WARN_EMPTY_BODY = YES;
+ CLANG_WARN_ENUM_CONVERSION = YES;
+ CLANG_WARN_INFINITE_RECURSION = YES;
+ CLANG_WARN_INT_CONVERSION = YES;
+ CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+ CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+ CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+ CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+ CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
+ CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+ CLANG_WARN_STRICT_PROTOTYPES = YES;
+ CLANG_WARN_SUSPICIOUS_MOVE = YES;
+ CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+ CLANG_WARN_UNREACHABLE_CODE = YES;
+ CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+ COPY_PHASE_STRIP = NO;
+ DEBUG_INFORMATION_FORMAT = dwarf;
+ ENABLE_STRICT_OBJC_MSGSEND = YES;
+ ENABLE_TESTABILITY = YES;
+ GCC_C_LANGUAGE_STANDARD = gnu11;
+ GCC_DYNAMIC_NO_PIC = NO;
+ GCC_NO_COMMON_BLOCKS = YES;
+ GCC_OPTIMIZATION_LEVEL = 0;
+ GCC_PREPROCESSOR_DEFINITIONS = (
+ "DEBUG=1",
+ "$(inherited)",
+ );
+ GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+ GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+ GCC_WARN_UNDECLARED_SELECTOR = YES;
+ GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+ GCC_WARN_UNUSED_FUNCTION = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ IPHONEOS_DEPLOYMENT_TARGET = 16.4;
+ MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
+ MTL_FAST_MATH = YES;
+ ONLY_ACTIVE_ARCH = YES;
+ SDKROOT = iphoneos;
+ SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
+ SWIFT_OPTIMIZATION_LEVEL = "-Onone";
+ };
+ name = Debug;
+ };
+ AAAAAAAAAAAAAAAAAAAAABBB /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ALWAYS_SEARCH_USER_PATHS = NO;
+ CLANG_ANALYZER_NONNULL = YES;
+ CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+ CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+ CLANG_CXX_LIBRARY = "libc++";
+ CLANG_ENABLE_MODULES = YES;
+ CLANG_ENABLE_OBJC_ARC = YES;
+ CLANG_ENABLE_OBJC_WEAK = YES;
+ CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+ CLANG_WARN_BOOL_CONVERSION = YES;
+ CLANG_WARN_COMMA = YES;
+ CLANG_WARN_CONSTANT_CONVERSION = YES;
+ CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+ CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+ CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+ CLANG_WARN_EMPTY_BODY = YES;
+ CLANG_WARN_ENUM_CONVERSION = YES;
+ CLANG_WARN_INFINITE_RECURSION = YES;
+ CLANG_WARN_INT_CONVERSION = YES;
+ CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+ CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+ CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+ CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+ CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
+ CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+ CLANG_WARN_STRICT_PROTOTYPES = YES;
+ CLANG_WARN_SUSPICIOUS_MOVE = YES;
+ CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+ CLANG_WARN_UNREACHABLE_CODE = YES;
+ CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+ COPY_PHASE_STRIP = NO;
+ DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+ ENABLE_NS_ASSERTIONS = NO;
+ ENABLE_STRICT_OBJC_MSGSEND = YES;
+ GCC_C_LANGUAGE_STANDARD = gnu11;
+ GCC_NO_COMMON_BLOCKS = YES;
+ GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+ GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+ GCC_WARN_UNDECLARED_SELECTOR = YES;
+ GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+ GCC_WARN_UNUSED_FUNCTION = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ IPHONEOS_DEPLOYMENT_TARGET = 16.4;
+ MTL_ENABLE_DEBUG_INFO = NO;
+ MTL_FAST_MATH = YES;
+ SDKROOT = iphoneos;
+ SWIFT_COMPILATION_MODE = wholemodule;
+ SWIFT_OPTIMIZATION_LEVEL = "-O";
+ VALIDATE_PRODUCT = YES;
+ };
+ name = Release;
+ };
+ BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+ ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
+ CODE_SIGN_STYLE = Manual;
+ DEVELOPMENT_TEAM = "";
+ ENABLE_PREVIEWS = YES;
+ FRAMEWORK_SEARCH_PATHS = "$(PROJECT_DIR)";
+ INFOPLIST_FILE = "iOSLlamaTest/Info.plist";
+ LD_RUNPATH_SEARCH_PATHS = (
+ "$(inherited)",
+ "@executable_path/Frameworks",
+ );
+ PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.iOSLlamaTest";
+ PRODUCT_NAME = "$(TARGET_NAME)";
+ PROVISIONING_PROFILE_SPECIFIER = "";
+ SWIFT_VERSION = 5.0;
+ TARGETED_DEVICE_FAMILY = "1,2";
+ };
+ name = Debug;
+ };
+ CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+ ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
+ CODE_SIGN_STYLE = Manual;
+ DEVELOPMENT_TEAM = "";
+ ENABLE_PREVIEWS = YES;
+ FRAMEWORK_SEARCH_PATHS = (
+ "$(inherited)",
+ "$(PROJECT_DIR)",
+ );
+ INFOPLIST_FILE = "iOSLlamaTest/Info.plist";
+ LD_RUNPATH_SEARCH_PATHS = (
+ "$(inherited)",
+ "@executable_path/Frameworks",
+ );
+ PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.iOSLlamaTest";
+ PRODUCT_NAME = "$(TARGET_NAME)";
+ PROVISIONING_PROFILE_SPECIFIER = "";
+ SWIFT_VERSION = 5.0;
+ TARGETED_DEVICE_FAMILY = "1,2";
+ };
+ name = Release;
+ };
+/* End XCBuildConfiguration section */
+EOF
+
+# Finish the project.pbxproj file
+cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
+/* Begin XCConfigurationList section */
+ 8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 9999999999999999999999AA /* Debug */,
+ AAAAAAAAAAAAAAAAAAAAABBB /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+ 4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */,
+ CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+/* End XCConfigurationList section */
+ };
+ rootObject = 7777777777777777777777AA /* Project object */;
+}
+EOF
+
+# 2. Copy XCFramework to test project
+echo "Copying XCFramework to test project..."
+cp -R "${XCFRAMEWORK_PATH}" "${TEMP_DIR}/${APP_NAME}/"
+
+# 3. Build and archive the app
+echo "Building and archiving test app..."
+cd "${TEMP_DIR}/${APP_NAME}"
+
+# Create a simple xcscheme file to avoid xcodebuild scheme issues
+mkdir -p "${APP_NAME}.xcodeproj/xcshareddata/xcschemes"
+cat > "${APP_NAME}.xcodeproj/xcshareddata/xcschemes/${APP_NAME}.xcscheme" << EOF
+<?xml version="1.0" encoding="UTF-8"?>
+<Scheme
+ LastUpgradeVersion = "1240"
+ version = "1.3">
+ <BuildAction
+ parallelizeBuildables = "YES"
+ buildImplicitDependencies = "YES">
+ <BuildActionEntries>
+ <BuildActionEntry
+ buildForTesting = "YES"
+ buildForRunning = "YES"
+ buildForProfiling = "YES"
+ buildForArchiving = "YES"
+ buildForAnalyzing = "YES">
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "3333333333333333333333AA"
+ BuildableName = "${APP_NAME}.app"
+ BlueprintName = "${APP_NAME}"
+ ReferencedContainer = "container:${APP_NAME}.xcodeproj">
+ </BuildableReference>
+ </BuildActionEntry>
+ </BuildActionEntries>
+ </BuildAction>
+ <TestAction
+ buildConfiguration = "Debug"
+ selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
+ selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
+ shouldUseLaunchSchemeArgsEnv = "YES">
+ <Testables>
+ </Testables>
+ </TestAction>
+ <LaunchAction
+ buildConfiguration = "Debug"
+ selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
+ selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
+ launchStyle = "0"
+ useCustomWorkingDirectory = "NO"
+ ignoresPersistentStateOnLaunch = "NO"
+ debugDocumentVersioning = "YES"
+ debugServiceExtension = "internal"
+ allowLocationSimulation = "YES">
+ <BuildableProductRunnable
+ runnableDebuggingMode = "0">
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "3333333333333333333333AA"
+ BuildableName = "${APP_NAME}.app"
+ BlueprintName = "${APP_NAME}"
+ ReferencedContainer = "container:${APP_NAME}.xcodeproj">
+ </BuildableReference>
+ </BuildableProductRunnable>
+ </LaunchAction>
+ <ProfileAction
+ buildConfiguration = "Release"
+ shouldUseLaunchSchemeArgsEnv = "YES"
+ savedToolIdentifier = ""
+ useCustomWorkingDirectory = "NO"
+ debugDocumentVersioning = "YES">
+ <BuildableProductRunnable
+ runnableDebuggingMode = "0">
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "3333333333333333333333AA"
+ BuildableName = "${APP_NAME}.app"
+ BlueprintName = "${APP_NAME}"
+ ReferencedContainer = "container:${APP_NAME}.xcodeproj">
+ </BuildableReference>
+ </BuildableProductRunnable>
+ </ProfileAction>
+ <AnalyzeAction
+ buildConfiguration = "Debug">
+ </AnalyzeAction>
+ <ArchiveAction
+ buildConfiguration = "Release"
+ revealArchiveInOrganizer = "YES">
+ </ArchiveAction>
+</Scheme>
+EOF
+
+# Now use xcodebuild with an explicitly defined product name
+xcodebuild -project "${APP_NAME}.xcodeproj" -scheme "${APP_NAME}" -sdk iphoneos -configuration Release archive -archivePath "${ARCHIVE_PATH}" CODE_SIGN_IDENTITY="-" CODE_SIGNING_REQUIRED=NO CODE_SIGNING_ALLOWED=NO PRODUCT_NAME="${APP_NAME}" SWIFT_OPTIMIZATION_LEVEL="-Onone" -quiet
+
+# 4. Create IPA from archive
+echo "Creating IPA from archive..."
+mkdir -p "${TEMP_DIR}/Payload"
+cp -R "${ARCHIVE_PATH}/Products/Applications/${APP_NAME}.app" "${TEMP_DIR}/Payload/"
+
+# Check and log app structure before zipping
+echo "App structure:"
+ls -la "${TEMP_DIR}/Payload/${APP_NAME}.app/"
+echo "Frameworks:"
+ls -la "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/" 2>/dev/null || echo "No Frameworks directory found"
+
+cd "${TEMP_DIR}"
+zip -r "${IPA_PATH}" Payload
+
+# Check embedded provisioning profile
+echo "Checking provisioning profile (if any)..."
+PROVISIONING_PROFILE=$(find "${ARCHIVE_PATH}/Products/Applications/${APP_NAME}.app" -name "embedded.mobileprovision" 2>/dev/null)
+if [ -n "$PROVISIONING_PROFILE" ]; then
+ echo "Found embedded provisioning profile:"
+ security cms -D -i "$PROVISIONING_PROFILE" || echo "Unable to decode provisioning profile"
+else
+ echo "No embedded provisioning profile found (expected for ad-hoc builds)"
+fi
+
+# 5. Validate the IPA
+echo "Validating IPA..."
+VALIDATION_OUTPUT="${VALIDATION_DIR}/validation_output.txt"
+
+# Check if authentication credentials are provided
+AUTH_ARGS=""
+if [ -n "$APPLE_ID" ] && [ -n "$APPLE_PASSWORD" ]; then
+ echo "Using Apple ID authentication for validation..."
+ AUTH_ARGS="--username \"$APPLE_ID\" --password \"$APPLE_PASSWORD\""
+else
+ echo "No authentication credentials provided. Will perform basic validation."
+ echo "To use your personal developer account, you can run the script with:"
+ echo " APPLE_ID='your.email@example.com' APPLE_PASSWORD='your-app-specific-password' ./validate-ios.sh"
+ echo "Note: You need to create an app-specific password at https://appleid.apple.com/account/manage"
+fi
+
+# Run validation with detailed output
+echo "Running validation with altool..."
+if [ -n "$AUTH_ARGS" ]; then
+ # Use eval to properly handle the quoted arguments
+ eval "xcrun altool --validate-app -f \"${IPA_PATH}\" --type ios --output-format xml $AUTH_ARGS" 2>&1 | tee "${VALIDATION_OUTPUT}"
+else
+ xcrun altool --validate-app -f "${IPA_PATH}" --type ios --output-format xml 2>&1 | tee "${VALIDATION_OUTPUT}"
+fi
+VALIDATION_RESULT=$?
+
+# Final validation result
+FINAL_VALIDATION_RESULT=0
+
+# Check if validation failed because the app isn't in App Store Connect
+if grep -q "No suitable application records were found" "${VALIDATION_OUTPUT}"; then
+ echo "⚠️ App Store Connect Warning: The app bundle identifier is not found in App Store Connect"
+ echo "This is expected for apps that haven't been registered in App Store Connect yet."
+ echo "This doesn't indicate a problem with the build or framework."
+
+ # Perform alternative validation
+ echo "Performing alternative validation checks..."
+
+ # Check if IPA was created successfully
+ if [ -f "${IPA_PATH}" ] && [ -s "${IPA_PATH}" ]; then
+ echo "✅ IPA file created successfully"
+ else
+ echo "❌ IPA file not created or empty"
+ FINAL_VALIDATION_RESULT=1
+ fi
+
+ # Check if app binary exists and is executable
+ if [ -f "${TEMP_DIR}/Payload/${APP_NAME}.app/${APP_NAME}" ] && [ -x "${TEMP_DIR}/Payload/${APP_NAME}.app/${APP_NAME}" ]; then
+ echo "✅ App binary exists and is executable"
+ else
+ echo "❌ App binary missing or not executable"
+ FINAL_VALIDATION_RESULT=1
+ fi
+
+ # Check if framework was properly embedded
+ if [ -d "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework" ]; then
+ echo "✅ llama.framework properly embedded"
+ else
+ echo "❌ llama.framework not properly embedded"
+ FINAL_VALIDATION_RESULT=1
+ fi
+
+ # Check if framework binary exists
+ if [ -f "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework/llama" ]; then
+ echo "✅ Framework binary exists"
+
+ # Further validate framework by checking architecture
+ ARCHS=$(lipo -info "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework/llama" 2>/dev/null | grep -o "arm64\\|armv7\\|x86_64" | tr '\n' ' ')
+ if [ -n "$ARCHS" ]; then
+ echo "✅ Framework architecture(s): $ARCHS"
+ else
+ echo "⚠️ Could not determine framework architecture"
+ fi
+ else
+ echo "❌ Framework binary missing"
+ FINAL_VALIDATION_RESULT=1
+ fi
+
+ if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then
+ echo "✅ Alternative validation PASSED: App built successfully with embedded framework"
+ else
+ echo "❌ Alternative validation FAILED: Issues found with the app or framework"
+ fi
+elif grep -q "You must specify authentication credentials" "${VALIDATION_OUTPUT}" && [ -z "$AUTH_ARGS" ]; then
+ echo "✅ iOS Validation PASSED: IPA successfully validated"
+ echo "Results saved to ${VALIDATION_OUTPUT}"
+else
+ echo "❌ iOS Validation FAILED: IPA validation found issues"
+ echo "See validation output at ${VALIDATION_OUTPUT}"
+ echo ""
+ echo "==== VALIDATION ERRORS ===="
+
+ # Try to extract specific errors from the output
+ if grep -q "Error" "${VALIDATION_OUTPUT}"; then
+ grep -A 5 "Error" "${VALIDATION_OUTPUT}"
+ else
+ # If no specific error found, show the whole log
+ cat "${VALIDATION_OUTPUT}"
+ fi
+
+ # Additional debugging: check IPA contents
+ echo ""
+ echo "==== IPA CONTENTS ===="
+ mkdir -p "${TEMP_DIR}/ipa_contents"
+ unzip -q "${IPA_PATH}" -d "${TEMP_DIR}/ipa_contents"
+ ls -la "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app/"
+
+ # Check for code signing issues
+ echo ""
+ echo "==== CODE SIGNING INFO ===="
+ codesign -vv -d "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app" 2>&1 || echo "Code signing verification failed"
+
+ # Check embedded frameworks
+ echo ""
+ echo "==== FRAMEWORK INFO ===="
+ ls -la "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app/Frameworks/" 2>/dev/null || echo "No Frameworks directory found"
+fi
+
+# Don't clean up on error to allow inspection
+if [ $FINAL_VALIDATION_RESULT -ne 0 ]; then
+ echo ""
+ echo "Temporary files kept for inspection at: ${TEMP_DIR}"
+ echo "===== iOS Validation Process Failed ====="
+ exit 1
+fi
+
+# Clean up temporary files but keep build artifacts
+if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then
+ echo "Cleaning up temporary files..."
+ #rm -rf "${TEMP_DIR}"
+fi
+
+echo "===== iOS Validation Process Completed ====="
+exit $FINAL_VALIDATION_RESULT
diff --git a/llama.cpp/scripts/apple/validate-macos.sh b/llama.cpp/scripts/apple/validate-macos.sh
new file mode 100755
index 0000000..fa800ee
--- /dev/null
+++ b/llama.cpp/scripts/apple/validate-macos.sh
@@ -0,0 +1,781 @@
+#!/usr/bin/env bash
+# validate-macos.sh - Validate macOS Application with embedded llama.xcframework using SwiftUI
+
+# Authentication options (optional) (can be set via environment variables)
+# To use: export APPLE_ID=your.email@example.com
+# export APPLE_PASSWORD=your-app-specific-password
+# ./validate-macos.sh
+APPLE_ID=${APPLE_ID:-""}
+APPLE_PASSWORD=${APPLE_PASSWORD:-""}
+
+# Ensure the script exits on error
+set -e
+
+# Function to print usage instructions
+print_usage() {
+ echo "Usage: ./validate-macos.sh [OPTIONS]"
+ echo ""
+ echo "Options:"
+ echo " --help Show this help message"
+ echo " --apple-id EMAIL Apple ID email for validation"
+ echo " --apple-password PWD App-specific password for Apple ID"
+ echo ""
+ echo "Environment variables:"
+ echo " APPLE_ID Apple ID email for validation"
+ echo " APPLE_PASSWORD App-specific password for Apple ID"
+ echo ""
+ echo "Notes:"
+ echo " - Command line options take precedence over environment variables"
+ echo " - Authentication is optional. If not provided, alternative validation will be performed"
+ echo " - For APPLE_PASSWORD, use an app-specific password generated at https://appleid.apple.com/account/manage"
+}
+
+# Parse command line arguments
+while [[ $# -gt 0 ]]; do
+ case $1 in
+ --help)
+ print_usage
+ exit 0
+ ;;
+ --apple-id)
+ APPLE_ID="$2"
+ shift 2
+ ;;
+ --apple-password)
+ APPLE_PASSWORD="$2"
+ shift 2
+ ;;
+ *)
+ echo "Unknown option: $1"
+ print_usage
+ exit 1
+ ;;
+ esac
+done
+
+# Function to clean up in case of error
+cleanup() {
+ # Don't clean up temp files on error to help with debugging
+ echo "===== macOS Validation Process Failed ====="
+ exit 1
+}
+
+# Set up trap to call cleanup function on error
+trap cleanup ERR
+
+set -e # Exit on any error
+
+ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../.." && pwd )"
+BUILD_DIR="${ROOT_DIR}/validation-builds/ios"
+
+# Configuration
+APP_NAME="MacOSLlamaTest"
+BUNDLE_ID="org.ggml.MacOSLlamaTest"
+XCFRAMEWORK_PATH="${ROOT_DIR}/build-apple/llama.xcframework"
+TEMP_DIR="${BUILD_DIR}/temp"
+ARCHIVE_PATH="${BUILD_DIR}/${APP_NAME}.xcarchive"
+APP_PATH="${BUILD_DIR}/${APP_NAME}.app"
+ZIP_PATH="${BUILD_DIR}/${APP_NAME}.zip"
+VALIDATION_DIR="${BUILD_DIR}/validation"
+
+# Create necessary directories
+mkdir -p "${BUILD_DIR}"
+mkdir -p "${TEMP_DIR}"
+mkdir -p "${VALIDATION_DIR}"
+
+echo "===== macOS Validation Process Started ====="
+
+# 1. Create a simple test app project
+echo "Creating test macOS app project..."
+mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}"
+cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Info.plist" << EOF
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+ <key>CFBundleDevelopmentRegion</key>
+ <string>en</string>
+ <key>CFBundleExecutable</key>
+ <string>${APP_NAME}</string>
+ <key>CFBundleIdentifier</key>
+ <string>${BUNDLE_ID}</string>
+ <key>CFBundleInfoDictionaryVersion</key>
+ <string>6.0</string>
+ <key>CFBundleName</key>
+ <string>${APP_NAME}</string>
+ <key>CFBundlePackageType</key>
+ <string>APPL</string>
+ <key>CFBundleShortVersionString</key>
+ <string>1.0</string>
+ <key>CFBundleVersion</key>
+ <string>1</string>
+ <key>LSMinimumSystemVersion</key>
+ <string>12.0</string>
+ <key>NSHumanReadableCopyright</key>
+ <string>Copyright © 2025 GGML. All rights reserved.</string>
+ <key>NSPrincipalClass</key>
+ <string>NSApplication</string>
+</dict>
+</plist>
+EOF
+
+# Create SwiftUI app files
+mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources"
+
+# Create App.swift
+cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/App.swift" << EOF
+import SwiftUI
+import llama
+
+@main
+struct LlamaTestApp: App {
+ var body: some Scene {
+ WindowGroup {
+ ContentView()
+ }
+ }
+}
+EOF
+
+# Create ContentView.swift with macOS specific elements
+cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/ContentView.swift" << EOF
+import SwiftUI
+import llama
+
+struct ContentView: View {
+ // Test that we can initialize a llama context params struct
+ let params = llama_context_default_params()
+
+ var body: some View {
+ VStack(spacing: 20) {
+ Text("Llama Framework Test on macOS")
+ .font(.largeTitle)
+ .padding()
+
+ Text("llama_context_default_params() created successfully")
+ .font(.headline)
+ .multilineTextAlignment(.center)
+ .padding()
+
+ // Display some param values to confirm the framework is working
+ Text("n_ctx: \(params.n_ctx)")
+ .font(.body)
+
+ Text("n_batch: \(params.n_batch)")
+ .font(.body)
+
+ Spacer()
+ }
+ .padding()
+ .frame(width: 600, height: 400)
+ }
+}
+
+struct ContentView_Previews: PreviewProvider {
+ static var previews: some View {
+ ContentView()
+ }
+}
+EOF
+
+# Create project.pbxproj, fixing the framework search paths issues
+mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj"
+cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
+// !$*UTF8*$!
+{
+ archiveVersion = 1;
+ classes = {
+ };
+ objectVersion = 54;
+ objects = {
+
+/* Begin PBXBuildFile section */
+ 11111111111111111111111 /* App.swift in Sources */ = {isa = PBXBuildFile; fileRef = 22222222222222222222222; };
+ 33333333333333333333333 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 44444444444444444444444; };
+ 55555555555555555555555 /* llama.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; };
+ 77777777777777777777777 /* llama.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; };
+/* End PBXBuildFile section */
+
+/* Begin PBXCopyFilesBuildPhase section */
+ 88888888888888888888888 /* Embed Frameworks */ = {
+ isa = PBXCopyFilesBuildPhase;
+ buildActionMask = 2147483647;
+ dstPath = "";
+ dstSubfolderSpec = 10;
+ files = (
+ 77777777777777777777777 /* llama.xcframework in Embed Frameworks */,
+ );
+ name = "Embed Frameworks";
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXCopyFilesBuildPhase section */
+
+/* Begin PBXFileReference section */
+EOF
+
+# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
+cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
+ 99999999999999999999999 /* ${APP_NAME}.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "${APP_NAME}.app"; sourceTree = BUILT_PRODUCTS_DIR; };
+ 22222222222222222222222 /* App.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = App.swift; sourceTree = "<group>"; };
+ 44444444444444444444444 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
+ AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
+ 66666666666666666666666 /* llama.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; path = llama.xcframework; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+EOF
+
+# Add the rest of the project file with fixed framework search paths
+cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
+/* Begin PBXFrameworksBuildPhase section */
+ BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */ = {
+ isa = PBXFrameworksBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 55555555555555555555555 /* llama.xcframework in Frameworks */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+EOF
+
+# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
+cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
+ CCCCCCCCCCCCCCCCCCCCCCCC /* Products */ = {
+ isa = PBXGroup;
+ children = (
+ 99999999999999999999999 /* ${APP_NAME}.app */,
+ );
+ name = Products;
+ sourceTree = "<group>";
+ };
+EOF
+
+cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
+ DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */ = {
+ isa = PBXGroup;
+ children = (
+ 66666666666666666666666 /* llama.xcframework */,
+ );
+ name = Frameworks;
+ sourceTree = "<group>";
+ };
+ EEEEEEEEEEEEEEEEEEEEEEEE = {
+ isa = PBXGroup;
+ children = (
+ FFFFFFFFFFFFFFFFFFFFFFFF /* MacOSLlamaTest */,
+ CCCCCCCCCCCCCCCCCCCCCCCC /* Products */,
+ DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */,
+ );
+ sourceTree = "<group>";
+ };
+ FFFFFFFFFFFFFFFFFFFFFFFF /* MacOSLlamaTest */ = {
+ isa = PBXGroup;
+ children = (
+ 1111111111111111111111AA /* Sources */,
+ AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */,
+ );
+ path = "MacOSLlamaTest";
+ sourceTree = "<group>";
+ };
+ 1111111111111111111111AA /* Sources */ = {
+ isa = PBXGroup;
+ children = (
+ 22222222222222222222222 /* App.swift */,
+ 44444444444444444444444 /* ContentView.swift */,
+ );
+ path = Sources;
+ sourceTree = "<group>";
+ };
+/* End PBXGroup section */
+EOF
+
+# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
+cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
+/* Begin PBXNativeTarget section */
+ 3333333333333333333333AA /* ${APP_NAME} */ = {
+ isa = PBXNativeTarget;
+ buildConfigurationList = 4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */;
+ buildPhases = (
+ 5555555555555555555555AA /* Sources */,
+ BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */,
+ 6666666666666666666666AA /* Resources */,
+ 88888888888888888888888 /* Embed Frameworks */,
+ );
+ buildRules = (
+ );
+ dependencies = (
+ );
+ name = "${APP_NAME}";
+ productName = "${APP_NAME}";
+ productReference = 99999999999999999999999 /* ${APP_NAME}.app */;
+ productType = "com.apple.product-type.application";
+ };
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+ 7777777777777777777777AA /* Project object */ = {
+ isa = PBXProject;
+ attributes = {
+ LastSwiftUpdateCheck = 1240;
+ LastUpgradeCheck = 1240;
+ TargetAttributes = {
+ 3333333333333333333333AA = {
+ CreatedOnToolsVersion = 12.4;
+ };
+ };
+ };
+ buildConfigurationList = 8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */;
+ compatibilityVersion = "Xcode 12.0";
+ developmentRegion = en;
+ hasScannedForEncodings = 0;
+ knownRegions = (
+ en,
+ Base,
+ );
+ mainGroup = EEEEEEEEEEEEEEEEEEEEEEEE;
+ productRefGroup = CCCCCCCCCCCCCCCCCCCCCCCC /* Products */;
+ projectDirPath = "";
+ projectRoot = "";
+ targets = (
+ 3333333333333333333333AA /* ${APP_NAME} */,
+ );
+ };
+/* End PBXProject section */
+EOF
+
+# Add the rest of the file with correct FRAMEWORK_SEARCH_PATHS and macOS settings
+cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
+/* Begin PBXResourcesBuildPhase section */
+ 6666666666666666666666AA /* Resources */ = {
+ isa = PBXResourcesBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXResourcesBuildPhase section */
+
+/* Begin PBXSourcesBuildPhase section */
+ 5555555555555555555555AA /* Sources */ = {
+ isa = PBXSourcesBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 33333333333333333333333 /* ContentView.swift in Sources */,
+ 11111111111111111111111 /* App.swift in Sources */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+ 9999999999999999999999AA /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ALWAYS_SEARCH_USER_PATHS = NO;
+ CLANG_ANALYZER_NONNULL = YES;
+ CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+ CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+ CLANG_CXX_LIBRARY = "libc++";
+ CLANG_ENABLE_MODULES = YES;
+ CLANG_ENABLE_OBJC_ARC = YES;
+ CLANG_ENABLE_OBJC_WEAK = YES;
+ CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+ CLANG_WARN_BOOL_CONVERSION = YES;
+ CLANG_WARN_COMMA = YES;
+ CLANG_WARN_CONSTANT_CONVERSION = YES;
+ CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+ CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+ CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+ CLANG_WARN_EMPTY_BODY = YES;
+ CLANG_WARN_ENUM_CONVERSION = YES;
+ CLANG_WARN_INFINITE_RECURSION = YES;
+ CLANG_WARN_INT_CONVERSION = YES;
+ CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+ CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+ CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+ CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+ CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
+ CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+ CLANG_WARN_STRICT_PROTOTYPES = YES;
+ CLANG_WARN_SUSPICIOUS_MOVE = YES;
+ CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+ CLANG_WARN_UNREACHABLE_CODE = YES;
+ CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+ COPY_PHASE_STRIP = NO;
+ DEBUG_INFORMATION_FORMAT = dwarf;
+ ENABLE_STRICT_OBJC_MSGSEND = YES;
+ ENABLE_TESTABILITY = YES;
+ GCC_C_LANGUAGE_STANDARD = gnu11;
+ GCC_DYNAMIC_NO_PIC = NO;
+ GCC_NO_COMMON_BLOCKS = YES;
+ GCC_OPTIMIZATION_LEVEL = 0;
+ GCC_PREPROCESSOR_DEFINITIONS = (
+ "DEBUG=1",
+ "$(inherited)",
+ );
+ GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+ GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+ GCC_WARN_UNDECLARED_SELECTOR = YES;
+ GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+ GCC_WARN_UNUSED_FUNCTION = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ MACOSX_DEPLOYMENT_TARGET = 12.0;
+ MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
+ MTL_FAST_MATH = YES;
+ ONLY_ACTIVE_ARCH = YES;
+ SDKROOT = macosx;
+ SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
+ SWIFT_OPTIMIZATION_LEVEL = "-Onone";
+ };
+ name = Debug;
+ };
+ AAAAAAAAAAAAAAAAAAAAABBB /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ALWAYS_SEARCH_USER_PATHS = NO;
+ CLANG_ANALYZER_NONNULL = YES;
+ CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+ CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+ CLANG_CXX_LIBRARY = "libc++";
+ CLANG_ENABLE_MODULES = YES;
+ CLANG_ENABLE_OBJC_ARC = YES;
+ CLANG_ENABLE_OBJC_WEAK = YES;
+ CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+ CLANG_WARN_BOOL_CONVERSION = YES;
+ CLANG_WARN_COMMA = YES;
+ CLANG_WARN_CONSTANT_CONVERSION = YES;
+ CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+ CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+ CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+ CLANG_WARN_EMPTY_BODY = YES;
+ CLANG_WARN_ENUM_CONVERSION = YES;
+ CLANG_WARN_INFINITE_RECURSION = YES;
+ CLANG_WARN_INT_CONVERSION = YES;
+ CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+ CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+ CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+ CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+ CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
+ CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+ CLANG_WARN_STRICT_PROTOTYPES = YES;
+ CLANG_WARN_SUSPICIOUS_MOVE = YES;
+ CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+ CLANG_WARN_UNREACHABLE_CODE = YES;
+ CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+ COPY_PHASE_STRIP = NO;
+ DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+ ENABLE_NS_ASSERTIONS = NO;
+ ENABLE_STRICT_OBJC_MSGSEND = YES;
+ GCC_C_LANGUAGE_STANDARD = gnu11;
+ GCC_NO_COMMON_BLOCKS = YES;
+ GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+ GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+ GCC_WARN_UNDECLARED_SELECTOR = YES;
+ GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+ GCC_WARN_UNUSED_FUNCTION = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ MACOSX_DEPLOYMENT_TARGET = 12.0;
+ MTL_ENABLE_DEBUG_INFO = NO;
+ MTL_FAST_MATH = YES;
+ SDKROOT = macosx;
+ SWIFT_COMPILATION_MODE = wholemodule;
+ SWIFT_OPTIMIZATION_LEVEL = "-O";
+ };
+ name = Release;
+ };
+ BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+ ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
+ CODE_SIGN_STYLE = Manual;
+ COMBINE_HIDPI_IMAGES = YES;
+ DEVELOPMENT_TEAM = "";
+ ENABLE_HARDENED_RUNTIME = YES;
+ ENABLE_PREVIEWS = YES;
+ FRAMEWORK_SEARCH_PATHS = "$(PROJECT_DIR)";
+ INFOPLIST_FILE = "MacOSLlamaTest/Info.plist";
+ LD_RUNPATH_SEARCH_PATHS = (
+ "$(inherited)",
+ "@executable_path/../Frameworks",
+ );
+ PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.MacOSLlamaTest";
+ PRODUCT_NAME = "$(TARGET_NAME)";
+ PROVISIONING_PROFILE_SPECIFIER = "";
+ SWIFT_VERSION = 5.0;
+ };
+ name = Debug;
+ };
+ CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+ ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
+ CODE_SIGN_STYLE = Manual;
+ COMBINE_HIDPI_IMAGES = YES;
+ DEVELOPMENT_TEAM = "";
+ ENABLE_HARDENED_RUNTIME = YES;
+ ENABLE_PREVIEWS = YES;
+ FRAMEWORK_SEARCH_PATHS = (
+ "$(inherited)",
+ "$(PROJECT_DIR)",
+ );
+ INFOPLIST_FILE = "MacOSLlamaTest/Info.plist";
+ LD_RUNPATH_SEARCH_PATHS = (
+ "$(inherited)",
+ "@executable_path/../Frameworks",
+ );
+ PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.MacOSLlamaTest";
+ PRODUCT_NAME = "$(TARGET_NAME)";
+ PROVISIONING_PROFILE_SPECIFIER = "";
+ SWIFT_VERSION = 5.0;
+ };
+ name = Release;
+ };
+/* End XCBuildConfiguration section */
+EOF
+
+# Finish the project.pbxproj file
+cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
+/* Begin XCConfigurationList section */
+ 8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 9999999999999999999999AA /* Debug */,
+ AAAAAAAAAAAAAAAAAAAAABBB /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+ 4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */,
+ CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+/* End XCConfigurationList section */
+ };
+ rootObject = 7777777777777777777777AA /* Project object */;
+}
+EOF
+
+# 2. Copy XCFramework to test project
+echo "Copying XCFramework to test project..."
+cp -R "${XCFRAMEWORK_PATH}" "${TEMP_DIR}/${APP_NAME}/"
+
+# 3. Build and archive the app
+echo "Building and archiving test app..."
+cd "${TEMP_DIR}/${APP_NAME}"
+
+# Create a simple xcscheme file to avoid xcodebuild scheme issues
+mkdir -p "${APP_NAME}.xcodeproj/xcshareddata/xcschemes"
+cat > "${APP_NAME}.xcodeproj/xcshareddata/xcschemes/${APP_NAME}.xcscheme" << EOF
+<?xml version="1.0" encoding="UTF-8"?>
+<Scheme
+ LastUpgradeVersion = "1240"
+ version = "1.3">
+ <BuildAction
+ parallelizeBuildables = "YES"
+ buildImplicitDependencies = "YES">
+ <BuildActionEntries>
+ <BuildActionEntry
+ buildForTesting = "YES"
+ buildForRunning = "YES"
+ buildForProfiling = "YES"
+ buildForArchiving = "YES"
+ buildForAnalyzing = "YES">
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "3333333333333333333333AA"
+ BuildableName = "${APP_NAME}.app"
+ BlueprintName = "${APP_NAME}"
+ ReferencedContainer = "container:${APP_NAME}.xcodeproj">
+ </BuildableReference>
+ </BuildActionEntry>
+ </BuildActionEntries>
+ </BuildAction>
+ <TestAction
+ buildConfiguration = "Debug"
+ selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
+ selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
+ shouldUseLaunchSchemeArgsEnv = "YES">
+ <Testables>
+ </Testables>
+ </TestAction>
+ <LaunchAction
+ buildConfiguration = "Debug"
+ selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
+ selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
+ launchStyle = "0"
+ useCustomWorkingDirectory = "NO"
+ ignoresPersistentStateOnLaunch = "NO"
+ debugDocumentVersioning = "YES"
+ debugServiceExtension = "internal"
+ allowLocationSimulation = "YES">
+ <BuildableProductRunnable
+ runnableDebuggingMode = "0">
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "3333333333333333333333AA"
+ BuildableName = "${APP_NAME}.app"
+ BlueprintName = "${APP_NAME}"
+ ReferencedContainer = "container:${APP_NAME}.xcodeproj">
+ </BuildableReference>
+ </BuildableProductRunnable>
+ </LaunchAction>
+ <ProfileAction
+ buildConfiguration = "Release"
+ shouldUseLaunchSchemeArgsEnv = "YES"
+ savedToolIdentifier = ""
+ useCustomWorkingDirectory = "NO"
+ debugDocumentVersioning = "YES">
+ <BuildableProductRunnable
+ runnableDebuggingMode = "0">
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "3333333333333333333333AA"
+ BuildableName = "${APP_NAME}.app"
+ BlueprintName = "${APP_NAME}"
+ ReferencedContainer = "container:${APP_NAME}.xcodeproj">
+ </BuildableReference>
+ </BuildableProductRunnable>
+ </ProfileAction>
+ <AnalyzeAction
+ buildConfiguration = "Debug">
+ </AnalyzeAction>
+ <ArchiveAction
+ buildConfiguration = "Release"
+ revealArchiveInOrganizer = "YES">
+ </ArchiveAction>
+</Scheme>
+EOF
+
+# Now use xcodebuild with an explicitly defined product name for macOS
+xcodebuild -project "${APP_NAME}.xcodeproj" -scheme "${APP_NAME}" -sdk macosx -configuration Release archive -archivePath "${ARCHIVE_PATH}" CODE_SIGN_IDENTITY="-" CODE_SIGNING_REQUIRED=NO CODE_SIGNING_ALLOWED=NO PRODUCT_NAME="${APP_NAME}" SWIFT_OPTIMIZATION_LEVEL="-Onone" -quiet
+
+# 4. Create a package for distribution
+echo "Creating distributable package from archive..."
+cp -R "${ARCHIVE_PATH}/Products/Applications/${APP_NAME}.app" "${APP_PATH}"
+
+# Check and log app structure
+echo "App structure:"
+ls -la "${APP_PATH}"
+echo "Frameworks:"
+ls -la "${APP_PATH}/Contents/Frameworks/" 2>/dev/null || echo "No Frameworks directory found"
+
+# Create a zip file for potential distribution
+cd "${BUILD_DIR}"
+zip -r "${ZIP_PATH}" "${APP_NAME}.app"
+
+# Check embedded provisioning profile
+echo "Checking provisioning profile (if any)..."
+PROVISIONING_PROFILE=$(find "${APP_PATH}/Contents" -name "embedded.provisionprofile" 2>/dev/null)
+if [ -n "$PROVISIONING_PROFILE" ]; then
+ echo "Found embedded provisioning profile:"
+ security cms -D -i "$PROVISIONING_PROFILE" || echo "Unable to decode provisioning profile"
+else
+ echo "No embedded provisioning profile found (expected for ad-hoc builds)"
+fi
+
+# 5. Validate the app
+echo "Validating macOS app..."
+VALIDATION_OUTPUT="${VALIDATION_DIR}/validation_output.txt"
+
+# Check if authentication credentials are provided
+AUTH_ARGS=""
+if [ -n "$APPLE_ID" ] && [ -n "$APPLE_PASSWORD" ]; then
+ echo "Using Apple ID authentication for validation..."
+ AUTH_ARGS="--username \"$APPLE_ID\" --password \"$APPLE_PASSWORD\""
+else
+ echo "No authentication credentials provided. Will perform basic validation."
+ echo "To use your personal developer account, you can run the script with:"
+ echo " APPLE_ID='your.email@example.com' APPLE_PASSWORD='your-app-specific-password' ./validate-macos.sh"
+ echo "Note: You need to create an app-specific password at https://appleid.apple.com/account/manage"
+fi
+
+# For macOS we need to use notarytool or alternative checks because altool doesn't support macOS apps in the same way
+echo "Note: For macOS, formal notarization process would require Apple Developer credentials."
+echo "Performing alternative validation checks..."
+
+# Final validation result
+FINAL_VALIDATION_RESULT=0
+
+# Check if app was created successfully
+if [ -d "${APP_PATH}" ] && [ -s "${APP_PATH}/Contents/MacOS/${APP_NAME}" ]; then
+ echo "✅ App package created successfully"
+else
+ echo "❌ App package not created or binary missing"
+ FINAL_VALIDATION_RESULT=1
+fi
+
+# Check if app binary exists and is executable
+if [ -f "${APP_PATH}/Contents/MacOS/${APP_NAME}" ] && [ -x "${APP_PATH}/Contents/MacOS/${APP_NAME}" ]; then
+ echo "✅ App binary exists and is executable"
+else
+ echo "❌ App binary missing or not executable"
+ FINAL_VALIDATION_RESULT=1
+fi
+
+# Check if framework was properly embedded
+if [ -d "${APP_PATH}/Contents/Frameworks/llama.framework" ]; then
+ echo "✅ llama.framework properly embedded"
+else
+ echo "❌ llama.framework not properly embedded"
+ FINAL_VALIDATION_RESULT=1
+fi
+
+# Check if framework binary exists
+if [ -f "${APP_PATH}/Contents/Frameworks/llama.framework/Versions/A/llama" ]; then
+ echo "✅ Framework binary exists"
+
+ # Further validate framework by checking architecture
+ ARCHS=$(lipo -info "${APP_PATH}/Contents/Frameworks/llama.framework/Versions/A/llama" 2>/dev/null | grep -o "arm64\\|x86_64" | tr '\n' ' ')
+ if [ -n "$ARCHS" ]; then
+ echo "✅ Framework architecture(s): $ARCHS"
+ else
+ echo "⚠️ Could not determine framework architecture"
+ fi
+else
+ echo "❌ Framework binary missing"
+ FINAL_VALIDATION_RESULT=1
+fi
+
+# Check code signing
+echo ""
+echo "==== CODE SIGNING INFO ===="
+codesign -vv -d "${APP_PATH}" 2>&1 || echo "Code signing verification not available (expected for ad-hoc builds)"
+
+if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then
+ if [ -n "$AUTH_ARGS" ]; then
+ echo ""
+ echo "To notarize this app with Apple (requires Apple Developer account):"
+ echo "xcrun notarytool submit \"${ZIP_PATH}\" --apple-id \"your-apple-id\" --password \"your-app-specific-password\" --team-id \"your-team-id\" --wait"
+ echo ""
+ fi
+ echo "✅ Validation PASSED: macOS app built successfully with embedded framework"
+else
+ echo "❌ Validation FAILED: Issues found with the app or framework"
+fi
+
+# Don't clean up on error to allow inspection
+if [ $FINAL_VALIDATION_RESULT -ne 0 ]; then
+ echo ""
+ echo "Temporary files kept for inspection at: ${TEMP_DIR}"
+ echo "===== macOS Validation Process Failed ====="
+ exit 1
+fi
+
+# Clean up temporary files but keep build artifacts
+if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then
+ echo "Cleaning up temporary files..."
+ #rm -rf "${TEMP_DIR}"
+fi
+
+echo "===== macOS Validation Process Completed ====="
+echo "App package available at: ${APP_PATH}"
+echo "Zipped app available at: ${ZIP_PATH}"
+exit $FINAL_VALIDATION_RESULT
diff --git a/llama.cpp/scripts/apple/validate-tvos.sh b/llama.cpp/scripts/apple/validate-tvos.sh
new file mode 100755
index 0000000..b4da698
--- /dev/null
+++ b/llama.cpp/scripts/apple/validate-tvos.sh
@@ -0,0 +1,813 @@
+#!/usr/bin/env bash
+# validate-tvos.sh - Validate tvOS Application with embedded llama.xcframework using SwiftUI
+
+# Authentication options (optional) (can be set via environment variables)
+# To use: export APPLE_ID=your.email@example.com
+# export APPLE_PASSWORD=your-app-specific-password
+# ./validate-tvos.sh
+APPLE_ID=${APPLE_ID:-""}
+APPLE_PASSWORD=${APPLE_PASSWORD:-""}
+
+# Ensure the script exits on error
+set -e
+
+# Function to print usage instructions
+print_usage() {
+ echo "Usage: ./validate-tvos.sh [OPTIONS]"
+ echo ""
+ echo "Options:"
+ echo " --help Show this help message"
+ echo " --apple-id EMAIL Apple ID email for validation"
+ echo " --apple-password PWD App-specific password for Apple ID"
+ echo ""
+ echo "Environment variables:"
+ echo " APPLE_ID Apple ID email for validation"
+ echo " APPLE_PASSWORD App-specific password for Apple ID"
+ echo ""
+ echo "Notes:"
+ echo " - Command line options take precedence over environment variables"
+ echo " - Authentication is optional. If not provided, alternative validation will be performed"
+ echo " - For APPLE_PASSWORD, use an app-specific password generated at https://appleid.apple.com/account/manage"
+}
+
+# Parse command line arguments
+while [[ $# -gt 0 ]]; do
+ case $1 in
+ --help)
+ print_usage
+ exit 0
+ ;;
+ --apple-id)
+ APPLE_ID="$2"
+ shift 2
+ ;;
+ --apple-password)
+ APPLE_PASSWORD="$2"
+ shift 2
+ ;;
+ *)
+ echo "Unknown option: $1"
+ print_usage
+ exit 1
+ ;;
+ esac
+done
+
+# Function to clean up in case of error
+cleanup() {
+ # Don't clean up temp files on error to help with debugging
+ echo "===== tvOS Validation Process Failed ====="
+ exit 1
+}
+
+# Set up trap to call cleanup function on error
+trap cleanup ERR
+
+set -e # Exit on any error
+
+ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../.." && pwd )"
+BUILD_DIR="${ROOT_DIR}/validation-builds/ios"
+
+# Configuration
+APP_NAME="TVOSLlamaTest"
+BUNDLE_ID="org.ggml.TVOSLlamaTest"
+XCFRAMEWORK_PATH="${ROOT_DIR}/build-apple/llama.xcframework"
+TEMP_DIR="${BUILD_DIR}/temp"
+ARCHIVE_PATH="${BUILD_DIR}/${APP_NAME}.xcarchive"
+IPA_PATH="${BUILD_DIR}/${APP_NAME}.ipa"
+VALIDATION_DIR="${BUILD_DIR}/validation"
+
+# Create necessary directories
+mkdir -p "${BUILD_DIR}"
+mkdir -p "${TEMP_DIR}"
+mkdir -p "${VALIDATION_DIR}"
+
+echo "===== tvOS Validation Process Started ====="
+
+# 1. Create a simple test app project
+echo "Creating test tvOS app project..."
+mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}"
+cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Info.plist" << EOF
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+ <key>CFBundleDevelopmentRegion</key>
+ <string>en</string>
+ <key>CFBundleExecutable</key>
+ <string>${APP_NAME}</string>
+ <key>CFBundleIdentifier</key>
+ <string>${BUNDLE_ID}</string>
+ <key>CFBundleInfoDictionaryVersion</key>
+ <string>6.0</string>
+ <key>CFBundleName</key>
+ <string>${APP_NAME}</string>
+ <key>CFBundlePackageType</key>
+ <string>APPL</string>
+ <key>CFBundleShortVersionString</key>
+ <string>1.0</string>
+ <key>CFBundleVersion</key>
+ <string>1</string>
+ <key>UIRequiredDeviceCapabilities</key>
+ <array>
+ <string>arm64</string>
+ </array>
+</dict>
+</plist>
+EOF
+
+# Create SwiftUI app files
+mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources"
+
+# Create App.swift
+cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/App.swift" << EOF
+import SwiftUI
+import llama
+
+@main
+struct LlamaTestApp: App {
+ var body: some Scene {
+ WindowGroup {
+ ContentView()
+ }
+ }
+}
+EOF
+
+# Create ContentView.swift with tvOS specific elements
+cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/ContentView.swift" << EOF
+import SwiftUI
+import llama
+
+struct ContentView: View {
+ // Test that we can initialize a llama context params struct
+ let params = llama_context_default_params()
+
+ var body: some View {
+ VStack(spacing: 40) {
+ Text("Llama Framework Test on tvOS")
+ .font(.largeTitle)
+ .padding()
+
+ Text("llama_context_default_params() created successfully")
+ .font(.headline)
+ .multilineTextAlignment(.center)
+ .padding()
+
+ // Display some param values to confirm the framework is working
+ Text("n_ctx: \(params.n_ctx)")
+ .font(.title2)
+
+ Text("n_batch: \(params.n_batch)")
+ .font(.title2)
+
+ Spacer()
+ }
+ .padding(50)
+ // Larger size suitable for TV display
+ }
+}
+
+struct ContentView_Previews: PreviewProvider {
+ static var previews: some View {
+ ContentView()
+ }
+}
+EOF
+
+# Create project.pbxproj, fixing the framework search paths issues
+mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj"
+cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
+// !$*UTF8*$!
+{
+ archiveVersion = 1;
+ classes = {
+ };
+ objectVersion = 54;
+ objects = {
+
+/* Begin PBXBuildFile section */
+ 11111111111111111111111 /* App.swift in Sources */ = {isa = PBXBuildFile; fileRef = 22222222222222222222222; };
+ 33333333333333333333333 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 44444444444444444444444; };
+ 55555555555555555555555 /* llama.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; };
+ 77777777777777777777777 /* llama.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; };
+/* End PBXBuildFile section */
+
+/* Begin PBXCopyFilesBuildPhase section */
+ 88888888888888888888888 /* Embed Frameworks */ = {
+ isa = PBXCopyFilesBuildPhase;
+ buildActionMask = 2147483647;
+ dstPath = "";
+ dstSubfolderSpec = 10;
+ files = (
+ 77777777777777777777777 /* llama.xcframework in Embed Frameworks */,
+ );
+ name = "Embed Frameworks";
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXCopyFilesBuildPhase section */
+
+/* Begin PBXFileReference section */
+EOF
+
+# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
+cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
+ 99999999999999999999999 /* ${APP_NAME}.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "${APP_NAME}.app"; sourceTree = BUILT_PRODUCTS_DIR; };
+ 22222222222222222222222 /* App.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = App.swift; sourceTree = "<group>"; };
+ 44444444444444444444444 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
+ AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
+ 66666666666666666666666 /* llama.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; path = llama.xcframework; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+EOF
+
+# Add the rest of the project file with fixed framework search paths
+cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
+/* Begin PBXFrameworksBuildPhase section */
+ BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */ = {
+ isa = PBXFrameworksBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 55555555555555555555555 /* llama.xcframework in Frameworks */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+EOF
+
+# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
+cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
+ CCCCCCCCCCCCCCCCCCCCCCCC /* Products */ = {
+ isa = PBXGroup;
+ children = (
+ 99999999999999999999999 /* ${APP_NAME}.app */,
+ );
+ name = Products;
+ sourceTree = "<group>";
+ };
+EOF
+
+cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
+ DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */ = {
+ isa = PBXGroup;
+ children = (
+ 66666666666666666666666 /* llama.xcframework */,
+ );
+ name = Frameworks;
+ sourceTree = "<group>";
+ };
+ EEEEEEEEEEEEEEEEEEEEEEEE = {
+ isa = PBXGroup;
+ children = (
+ FFFFFFFFFFFFFFFFFFFFFFFF /* TVOSLlamaTest */,
+ CCCCCCCCCCCCCCCCCCCCCCCC /* Products */,
+ DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */,
+ );
+ sourceTree = "<group>";
+ };
+ FFFFFFFFFFFFFFFFFFFFFFFF /* TVOSLlamaTest */ = {
+ isa = PBXGroup;
+ children = (
+ 1111111111111111111111AA /* Sources */,
+ AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */,
+ );
+ path = "TVOSLlamaTest";
+ sourceTree = "<group>";
+ };
+ 1111111111111111111111AA /* Sources */ = {
+ isa = PBXGroup;
+ children = (
+ 22222222222222222222222 /* App.swift */,
+ 44444444444444444444444 /* ContentView.swift */,
+ );
+ path = Sources;
+ sourceTree = "<group>";
+ };
+/* End PBXGroup section */
+EOF
+
+# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
+cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
+/* Begin PBXNativeTarget section */
+ 3333333333333333333333AA /* ${APP_NAME} */ = {
+ isa = PBXNativeTarget;
+ buildConfigurationList = 4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */;
+ buildPhases = (
+ 5555555555555555555555AA /* Sources */,
+ BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */,
+ 6666666666666666666666AA /* Resources */,
+ 88888888888888888888888 /* Embed Frameworks */,
+ );
+ buildRules = (
+ );
+ dependencies = (
+ );
+ name = "${APP_NAME}";
+ productName = "${APP_NAME}";
+ productReference = 99999999999999999999999 /* ${APP_NAME}.app */;
+ productType = "com.apple.product-type.application";
+ };
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+ 7777777777777777777777AA /* Project object */ = {
+ isa = PBXProject;
+ attributes = {
+ LastSwiftUpdateCheck = 1240;
+ LastUpgradeCheck = 1240;
+ TargetAttributes = {
+ 3333333333333333333333AA = {
+ CreatedOnToolsVersion = 12.4;
+ };
+ };
+ };
+ buildConfigurationList = 8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */;
+ compatibilityVersion = "Xcode 12.0";
+ developmentRegion = en;
+ hasScannedForEncodings = 0;
+ knownRegions = (
+ en,
+ Base,
+ );
+ mainGroup = EEEEEEEEEEEEEEEEEEEEEEEE;
+ productRefGroup = CCCCCCCCCCCCCCCCCCCCCCCC /* Products */;
+ projectDirPath = "";
+ projectRoot = "";
+ targets = (
+ 3333333333333333333333AA /* ${APP_NAME} */,
+ );
+ };
+/* End PBXProject section */
+EOF
+
+# Add the rest of the file with correct FRAMEWORK_SEARCH_PATHS and tvOS settings
+cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
+/* Begin PBXResourcesBuildPhase section */
+ 6666666666666666666666AA /* Resources */ = {
+ isa = PBXResourcesBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXResourcesBuildPhase section */
+
+/* Begin PBXSourcesBuildPhase section */
+ 5555555555555555555555AA /* Sources */ = {
+ isa = PBXSourcesBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 33333333333333333333333 /* ContentView.swift in Sources */,
+ 11111111111111111111111 /* App.swift in Sources */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+ 9999999999999999999999AA /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ALWAYS_SEARCH_USER_PATHS = NO;
+ CLANG_ANALYZER_NONNULL = YES;
+ CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+ CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+ CLANG_CXX_LIBRARY = "libc++";
+ CLANG_ENABLE_MODULES = YES;
+ CLANG_ENABLE_OBJC_ARC = YES;
+ CLANG_ENABLE_OBJC_WEAK = YES;
+ CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+ CLANG_WARN_BOOL_CONVERSION = YES;
+ CLANG_WARN_COMMA = YES;
+ CLANG_WARN_CONSTANT_CONVERSION = YES;
+ CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+ CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+ CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+ CLANG_WARN_EMPTY_BODY = YES;
+ CLANG_WARN_ENUM_CONVERSION = YES;
+ CLANG_WARN_INFINITE_RECURSION = YES;
+ CLANG_WARN_INT_CONVERSION = YES;
+ CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+ CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+ CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+ CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+ CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
+ CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+ CLANG_WARN_STRICT_PROTOTYPES = YES;
+ CLANG_WARN_SUSPICIOUS_MOVE = YES;
+ CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+ CLANG_WARN_UNREACHABLE_CODE = YES;
+ CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+ COPY_PHASE_STRIP = NO;
+ DEBUG_INFORMATION_FORMAT = dwarf;
+ ENABLE_STRICT_OBJC_MSGSEND = YES;
+ ENABLE_TESTABILITY = YES;
+ GCC_C_LANGUAGE_STANDARD = gnu11;
+ GCC_DYNAMIC_NO_PIC = NO;
+ GCC_NO_COMMON_BLOCKS = YES;
+ GCC_OPTIMIZATION_LEVEL = 0;
+ GCC_PREPROCESSOR_DEFINITIONS = (
+ "DEBUG=1",
+ "$(inherited)",
+ );
+ GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+ GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+ GCC_WARN_UNDECLARED_SELECTOR = YES;
+ GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+ GCC_WARN_UNUSED_FUNCTION = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ TVOS_DEPLOYMENT_TARGET = 15.0;
+ MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
+ MTL_FAST_MATH = YES;
+ ONLY_ACTIVE_ARCH = YES;
+ SDKROOT = appletvos;
+ SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
+ SWIFT_OPTIMIZATION_LEVEL = "-Onone";
+ };
+ name = Debug;
+ };
+ AAAAAAAAAAAAAAAAAAAAABBB /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ALWAYS_SEARCH_USER_PATHS = NO;
+ CLANG_ANALYZER_NONNULL = YES;
+ CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+ CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+ CLANG_CXX_LIBRARY = "libc++";
+ CLANG_ENABLE_MODULES = YES;
+ CLANG_ENABLE_OBJC_ARC = YES;
+ CLANG_ENABLE_OBJC_WEAK = YES;
+ CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+ CLANG_WARN_BOOL_CONVERSION = YES;
+ CLANG_WARN_COMMA = YES;
+ CLANG_WARN_CONSTANT_CONVERSION = YES;
+ CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+ CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+ CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+ CLANG_WARN_EMPTY_BODY = YES;
+ CLANG_WARN_ENUM_CONVERSION = YES;
+ CLANG_WARN_INFINITE_RECURSION = YES;
+ CLANG_WARN_INT_CONVERSION = YES;
+ CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+ CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+ CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+ CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+ CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
+ CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+ CLANG_WARN_STRICT_PROTOTYPES = YES;
+ CLANG_WARN_SUSPICIOUS_MOVE = YES;
+ CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+ CLANG_WARN_UNREACHABLE_CODE = YES;
+ CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+ COPY_PHASE_STRIP = NO;
+ DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+ ENABLE_NS_ASSERTIONS = NO;
+ ENABLE_STRICT_OBJC_MSGSEND = YES;
+ GCC_C_LANGUAGE_STANDARD = gnu11;
+ GCC_NO_COMMON_BLOCKS = YES;
+ GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+ GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+ GCC_WARN_UNDECLARED_SELECTOR = YES;
+ GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+ GCC_WARN_UNUSED_FUNCTION = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ TVOS_DEPLOYMENT_TARGET = 15.0;
+ MTL_ENABLE_DEBUG_INFO = NO;
+ MTL_FAST_MATH = YES;
+ SDKROOT = appletvos;
+ SWIFT_COMPILATION_MODE = wholemodule;
+ SWIFT_OPTIMIZATION_LEVEL = "-O";
+ VALIDATE_PRODUCT = YES;
+ };
+ name = Release;
+ };
+ BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+ ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
+ CODE_SIGN_STYLE = Manual;
+ DEVELOPMENT_TEAM = "";
+ ENABLE_PREVIEWS = YES;
+ FRAMEWORK_SEARCH_PATHS = "$(PROJECT_DIR)";
+ INFOPLIST_FILE = "TVOSLlamaTest/Info.plist";
+ LD_RUNPATH_SEARCH_PATHS = (
+ "$(inherited)",
+ "@executable_path/Frameworks",
+ );
+ PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.TVOSLlamaTest";
+ PRODUCT_NAME = "$(TARGET_NAME)";
+ PROVISIONING_PROFILE_SPECIFIER = "";
+ SWIFT_VERSION = 5.0;
+ TARGETED_DEVICE_FAMILY = 3;
+ };
+ name = Debug;
+ };
+ CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+ ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
+ CODE_SIGN_STYLE = Manual;
+ DEVELOPMENT_TEAM = "";
+ ENABLE_PREVIEWS = YES;
+ FRAMEWORK_SEARCH_PATHS = (
+ "$(inherited)",
+ "$(PROJECT_DIR)",
+ );
+ INFOPLIST_FILE = "TVOSLlamaTest/Info.plist";
+ LD_RUNPATH_SEARCH_PATHS = (
+ "$(inherited)",
+ "@executable_path/Frameworks",
+ );
+ PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.TVOSLlamaTest";
+ PRODUCT_NAME = "$(TARGET_NAME)";
+ PROVISIONING_PROFILE_SPECIFIER = "";
+ SWIFT_VERSION = 5.0;
+ TARGETED_DEVICE_FAMILY = 3;
+ };
+ name = Release;
+ };
+/* End XCBuildConfiguration section */
+EOF
+
+# Finish the project.pbxproj file
+cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
+/* Begin XCConfigurationList section */
+ 8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 9999999999999999999999AA /* Debug */,
+ AAAAAAAAAAAAAAAAAAAAABBB /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+ 4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */,
+ CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+/* End XCConfigurationList section */
+ };
+ rootObject = 7777777777777777777777AA /* Project object */;
+}
+EOF
+
+# 2. Copy XCFramework to test project
+echo "Copying XCFramework to test project..."
+cp -R "${XCFRAMEWORK_PATH}" "${TEMP_DIR}/${APP_NAME}/"
+
+# 3. Build and archive the app
+echo "Building and archiving test app..."
+cd "${TEMP_DIR}/${APP_NAME}"
+
+# Create a simple xcscheme file to avoid xcodebuild scheme issues
+mkdir -p "${APP_NAME}.xcodeproj/xcshareddata/xcschemes"
+cat > "${APP_NAME}.xcodeproj/xcshareddata/xcschemes/${APP_NAME}.xcscheme" << EOF
+<?xml version="1.0" encoding="UTF-8"?>
+<Scheme
+ LastUpgradeVersion = "1240"
+ version = "1.3">
+ <BuildAction
+ parallelizeBuildables = "YES"
+ buildImplicitDependencies = "YES">
+ <BuildActionEntries>
+ <BuildActionEntry
+ buildForTesting = "YES"
+ buildForRunning = "YES"
+ buildForProfiling = "YES"
+ buildForArchiving = "YES"
+ buildForAnalyzing = "YES">
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "3333333333333333333333AA"
+ BuildableName = "${APP_NAME}.app"
+ BlueprintName = "${APP_NAME}"
+ ReferencedContainer = "container:${APP_NAME}.xcodeproj">
+ </BuildableReference>
+ </BuildActionEntry>
+ </BuildActionEntries>
+ </BuildAction>
+ <TestAction
+ buildConfiguration = "Debug"
+ selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
+ selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
+ shouldUseLaunchSchemeArgsEnv = "YES">
+ <Testables>
+ </Testables>
+ </TestAction>
+ <LaunchAction
+ buildConfiguration = "Debug"
+ selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
+ selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
+ launchStyle = "0"
+ useCustomWorkingDirectory = "NO"
+ ignoresPersistentStateOnLaunch = "NO"
+ debugDocumentVersioning = "YES"
+ debugServiceExtension = "internal"
+ allowLocationSimulation = "YES">
+ <BuildableProductRunnable
+ runnableDebuggingMode = "0">
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "3333333333333333333333AA"
+ BuildableName = "${APP_NAME}.app"
+ BlueprintName = "${APP_NAME}"
+ ReferencedContainer = "container:${APP_NAME}.xcodeproj">
+ </BuildableReference>
+ </BuildableProductRunnable>
+ </LaunchAction>
+ <ProfileAction
+ buildConfiguration = "Release"
+ shouldUseLaunchSchemeArgsEnv = "YES"
+ savedToolIdentifier = ""
+ useCustomWorkingDirectory = "NO"
+ debugDocumentVersioning = "YES">
+ <BuildableProductRunnable
+ runnableDebuggingMode = "0">
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "3333333333333333333333AA"
+ BuildableName = "${APP_NAME}.app"
+ BlueprintName = "${APP_NAME}"
+ ReferencedContainer = "container:${APP_NAME}.xcodeproj">
+ </BuildableReference>
+ </BuildableProductRunnable>
+ </ProfileAction>
+ <AnalyzeAction
+ buildConfiguration = "Debug">
+ </AnalyzeAction>
+ <ArchiveAction
+ buildConfiguration = "Release"
+ revealArchiveInOrganizer = "YES">
+ </ArchiveAction>
+</Scheme>
+EOF
+
+# Now use xcodebuild with an explicitly defined product name for tvOS
+xcodebuild -project "${APP_NAME}.xcodeproj" -scheme "${APP_NAME}" -sdk appletvos -configuration Release archive -archivePath "${ARCHIVE_PATH}" CODE_SIGN_IDENTITY="-" CODE_SIGNING_REQUIRED=NO CODE_SIGNING_ALLOWED=NO PRODUCT_NAME="${APP_NAME}" SWIFT_OPTIMIZATION_LEVEL="-Onone" -quiet
+
+# 4. Create IPA from archive
+echo "Creating IPA from archive..."
+mkdir -p "${TEMP_DIR}/Payload"
+cp -R "${ARCHIVE_PATH}/Products/Applications/${APP_NAME}.app" "${TEMP_DIR}/Payload/"
+
+# Check and log app structure before zipping
+echo "App structure:"
+ls -la "${TEMP_DIR}/Payload/${APP_NAME}.app/"
+echo "Frameworks:"
+ls -la "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/" 2>/dev/null || echo "No Frameworks directory found"
+
+cd "${TEMP_DIR}"
+zip -r "${IPA_PATH}" Payload
+
+# Check embedded provisioning profile
+echo "Checking provisioning profile (if any)..."
+PROVISIONING_PROFILE=$(find "${ARCHIVE_PATH}/Products/Applications/${APP_NAME}.app" -name "embedded.mobileprovision" 2>/dev/null)
+if [ -n "$PROVISIONING_PROFILE" ]; then
+ echo "Found embedded provisioning profile:"
+ security cms -D -i "$PROVISIONING_PROFILE" || echo "Unable to decode provisioning profile"
+else
+ echo "No embedded provisioning profile found (expected for ad-hoc builds)"
+fi
+
+# 5. Validate the IPA
+echo "Validating IPA..."
+VALIDATION_OUTPUT="${VALIDATION_DIR}/validation_output.txt"
+
+# Check if authentication credentials are provided
+AUTH_ARGS=""
+if [ -n "$APPLE_ID" ] && [ -n "$APPLE_PASSWORD" ]; then
+ echo "Using Apple ID authentication for validation..."
+ AUTH_ARGS="--username \"$APPLE_ID\" --password \"$APPLE_PASSWORD\""
+else
+ echo "No authentication credentials provided. Will perform basic validation."
+ echo "To use your personal developer account, you can run the script with:"
+ echo " APPLE_ID='your.email@example.com' APPLE_PASSWORD='your-app-specific-password' ./validate-tvos.sh"
+ echo "Note: You need to create an app-specific password at https://appleid.apple.com/account/manage"
+fi
+
+# Run validation with detailed output
+echo "Running validation with altool..."
+if [ -n "$AUTH_ARGS" ]; then
+ # Use eval to properly handle the quoted arguments
+ eval "xcrun altool --validate-app -f \"${IPA_PATH}\" --type tvos --output-format xml $AUTH_ARGS" 2>&1 | tee "${VALIDATION_OUTPUT}"
+else
+ xcrun altool --validate-app -f "${IPA_PATH}" --type tvos --output-format xml 2>&1 | tee "${VALIDATION_OUTPUT}"
+fi
+VALIDATION_RESULT=$?
+
+# Final validation result
+FINAL_VALIDATION_RESULT=0
+
+# Check if validation failed because the app isn't in App Store Connect
+if grep -q "No suitable application records were found" "${VALIDATION_OUTPUT}"; then
+ echo "⚠️ App Store Connect Warning: The app bundle identifier is not found in App Store Connect"
+ echo "This is expected for apps that haven't been registered in App Store Connect yet."
+ echo "This doesn't indicate a problem with the build or framework."
+
+ # Perform alternative validation
+ echo "Performing alternative validation checks..."
+
+ # Check if IPA was created successfully
+ if [ -f "${IPA_PATH}" ] && [ -s "${IPA_PATH}" ]; then
+ echo "✅ IPA file created successfully"
+ else
+ echo "❌ IPA file not created or empty"
+ FINAL_VALIDATION_RESULT=1
+ fi
+
+ # Check if app binary exists and is executable
+ if [ -f "${TEMP_DIR}/Payload/${APP_NAME}.app/${APP_NAME}" ] && [ -x "${TEMP_DIR}/Payload/${APP_NAME}.app/${APP_NAME}" ]; then
+ echo "✅ App binary exists and is executable"
+ else
+ echo "❌ App binary missing or not executable"
+ FINAL_VALIDATION_RESULT=1
+ fi
+
+ # Check if framework was properly embedded
+ if [ -d "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework" ]; then
+ echo "✅ llama.framework properly embedded"
+ else
+ echo "❌ llama.framework not properly embedded"
+ FINAL_VALIDATION_RESULT=1
+ fi
+
+ # Check if framework binary exists
+ if [ -f "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework/llama" ]; then
+ echo "✅ Framework binary exists"
+
+ # Further validate framework by checking architecture
+ ARCHS=$(lipo -info "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework/llama" 2>/dev/null | grep -o "arm64\\|x86_64" | tr '\n' ' ')
+ if [ -n "$ARCHS" ]; then
+ echo "✅ Framework architecture(s): $ARCHS"
+ else
+ echo "⚠️ Could not determine framework architecture"
+ fi
+ else
+ echo "❌ Framework binary missing"
+ FINAL_VALIDATION_RESULT=1
+ fi
+
+ if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then
+ echo "✅ Alternative validation PASSED: App built successfully with embedded framework"
+ else
+ echo "❌ Alternative validation FAILED: Issues found with the app or framework"
+ fi
+elif grep -q "You must specify authentication credentials" "${VALIDATION_OUTPUT}" && [ -z "$AUTH_ARGS" ]; then
+ echo "✅ tvOS Validation PASSED: IPA successfully validated"
+ echo "Results saved to ${VALIDATION_OUTPUT}"
+else
+ echo "❌ tvOS Validation FAILED: IPA validation found issues"
+ echo "See validation output at ${VALIDATION_OUTPUT}"
+ echo ""
+ echo "==== VALIDATION ERRORS ===="
+
+ # Try to extract specific errors from the output
+ if grep -q "Error" "${VALIDATION_OUTPUT}"; then
+ grep -A 5 "Error" "${VALIDATION_OUTPUT}"
+ else
+ # If no specific error found, show the whole log
+ cat "${VALIDATION_OUTPUT}"
+ fi
+
+ # Additional debugging: check IPA contents
+ echo ""
+ echo "==== IPA CONTENTS ===="
+ mkdir -p "${TEMP_DIR}/ipa_contents"
+ unzip -q "${IPA_PATH}" -d "${TEMP_DIR}/ipa_contents"
+ ls -la "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app/"
+
+ # Check for code signing issues
+ echo ""
+ echo "==== CODE SIGNING INFO ===="
+ codesign -vv -d "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app" 2>&1 || echo "Code signing verification failed"
+
+ # Check embedded frameworks
+ echo ""
+ echo "==== FRAMEWORK INFO ===="
+ ls -la "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app/Frameworks/" 2>/dev/null || echo "No Frameworks directory found"
+fi
+
+# Don't clean up on error to allow inspection
+if [ $FINAL_VALIDATION_RESULT -ne 0 ]; then
+ echo ""
+ echo "Temporary files kept for inspection at: ${TEMP_DIR}"
+ echo "===== tvOS Validation Process Failed ====="
+ exit 1
+fi
+
+# Clean up temporary files but keep build artifacts
+if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then
+ echo "Cleaning up temporary files..."
+ #rm -rf "${TEMP_DIR}"
+fi
+
+echo "===== tvOS Validation Process Completed ====="
+exit $FINAL_VALIDATION_RESULT
diff --git a/llama.cpp/scripts/apple/validate-visionos.sh b/llama.cpp/scripts/apple/validate-visionos.sh
new file mode 100755
index 0000000..bbdec66
--- /dev/null
+++ b/llama.cpp/scripts/apple/validate-visionos.sh
@@ -0,0 +1,811 @@
+#!/usr/bin/env bash
+# validate-visionos.sh - Validate visionOS Application with embedded llama.xcframework using SwiftUI
+
+# Authentication options (optional) (can be set via environment variables)
+# To use: export APPLE_ID=your.email@example.com
+# export APPLE_PASSWORD=your-app-specific-password
+# ./validate-visionos.sh
+APPLE_ID=${APPLE_ID:-""}
+APPLE_PASSWORD=${APPLE_PASSWORD:-""}
+
+# Ensure the script exits on error
+set -e
+
+# Function to print usage instructions
+print_usage() {
+ echo "Usage: ./validate-visionos.sh [OPTIONS]"
+ echo ""
+ echo "Options:"
+ echo " --help Show this help message"
+ echo " --apple-id EMAIL Apple ID email for validation"
+ echo " --apple-password PWD App-specific password for Apple ID"
+ echo ""
+ echo "Environment variables:"
+ echo " APPLE_ID Apple ID email for validation"
+ echo " APPLE_PASSWORD App-specific password for Apple ID"
+ echo ""
+ echo "Notes:"
+ echo " - Command line options take precedence over environment variables"
+ echo " - Authentication is optional. If not provided, alternative validation will be performed"
+ echo " - For APPLE_PASSWORD, use an app-specific password generated at https://appleid.apple.com/account/manage"
+}
+
+# Parse command line arguments
+while [[ $# -gt 0 ]]; do
+ case $1 in
+ --help)
+ print_usage
+ exit 0
+ ;;
+ --apple-id)
+ APPLE_ID="$2"
+ shift 2
+ ;;
+ --apple-password)
+ APPLE_PASSWORD="$2"
+ shift 2
+ ;;
+ *)
+ echo "Unknown option: $1"
+ print_usage
+ exit 1
+ ;;
+ esac
+done
+
+# Function to clean up in case of error
+cleanup() {
+ # Don't clean up temp files on error to help with debugging
+ echo "===== visionOS Validation Process Failed ====="
+ exit 1
+}
+
+# Set up trap to call cleanup function on error
+trap cleanup ERR
+
+set -e # Exit on any error
+
+ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../.." && pwd )"
+BUILD_DIR="${ROOT_DIR}/validation-builds/visionos"
+
+# Configuration
+APP_NAME="VisionOSLlamaTest"
+BUNDLE_ID="org.ggml.VisionOSLlamaTest"
+XCFRAMEWORK_PATH="${ROOT_DIR}/build-apple/llama.xcframework"
+TEMP_DIR="${BUILD_DIR}/temp"
+ARCHIVE_PATH="${BUILD_DIR}/${APP_NAME}.xcarchive"
+IPA_PATH="${BUILD_DIR}/${APP_NAME}.ipa"
+VALIDATION_DIR="${BUILD_DIR}/validation"
+
+# Create necessary directories
+mkdir -p "${BUILD_DIR}"
+mkdir -p "${TEMP_DIR}"
+mkdir -p "${VALIDATION_DIR}"
+
+echo "===== visionOS Validation Process Started ====="
+
+# 1. Create a simple test app project
+echo "Creating test visionOS app project..."
+mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}"
+cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Info.plist" << EOF
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+ <key>CFBundleDevelopmentRegion</key>
+ <string>en</string>
+ <key>CFBundleExecutable</key>
+ <string>${APP_NAME}</string>
+ <key>CFBundleIdentifier</key>
+ <string>${BUNDLE_ID}</string>
+ <key>CFBundleInfoDictionaryVersion</key>
+ <string>6.0</string>
+ <key>CFBundleName</key>
+ <string>${APP_NAME}</string>
+ <key>CFBundlePackageType</key>
+ <string>APPL</string>
+ <key>CFBundleShortVersionString</key>
+ <string>1.0</string>
+ <key>CFBundleVersion</key>
+ <string>1</string>
+</dict>
+</plist>
+EOF
+
+# Create SwiftUI app files
+mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources"
+
+# Create App.swift
+cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/App.swift" << EOF
+import SwiftUI
+import llama
+
+@main
+struct LlamaTestApp: App {
+ var body: some Scene {
+ WindowGroup {
+ ContentView()
+ }
+ }
+}
+EOF
+
+# Create ContentView.swift with visionOS specific elements
+cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/ContentView.swift" << EOF
+import SwiftUI
+import llama
+
+struct ContentView: View {
+ // Test that we can initialize a llama context params struct
+ let params = llama_context_default_params()
+
+ var body: some View {
+ VStack(spacing: 20) {
+ Text("Llama Framework Test on visionOS")
+ .font(.largeTitle)
+ .padding()
+
+ Text("llama_context_default_params() created successfully")
+ .font(.headline)
+ .multilineTextAlignment(.center)
+ .padding()
+
+ // Display some param values to confirm the framework is working
+ Text("n_ctx: \(params.n_ctx)")
+ .font(.body)
+
+ Text("n_batch: \(params.n_batch)")
+ .font(.body)
+
+ Spacer()
+ }
+ .padding()
+ .frame(width: 500, height: 400)
+ }
+}
+
+struct ContentView_Previews: PreviewProvider {
+ static var previews: some View {
+ ContentView()
+ }
+}
+EOF
+
+# Create project.pbxproj, fixing the framework search paths issues
+mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj"
+cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
+// !$*UTF8*$!
+{
+ archiveVersion = 1;
+ classes = {
+ };
+ objectVersion = 54;
+ objects = {
+
+/* Begin PBXBuildFile section */
+ 11111111111111111111111 /* App.swift in Sources */ = {isa = PBXBuildFile; fileRef = 22222222222222222222222; };
+ 33333333333333333333333 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 44444444444444444444444; };
+ 55555555555555555555555 /* llama.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; };
+ 77777777777777777777777 /* llama.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; };
+/* End PBXBuildFile section */
+
+/* Begin PBXCopyFilesBuildPhase section */
+ 88888888888888888888888 /* Embed Frameworks */ = {
+ isa = PBXCopyFilesBuildPhase;
+ buildActionMask = 2147483647;
+ dstPath = "";
+ dstSubfolderSpec = 10;
+ files = (
+ 77777777777777777777777 /* llama.xcframework in Embed Frameworks */,
+ );
+ name = "Embed Frameworks";
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXCopyFilesBuildPhase section */
+
+/* Begin PBXFileReference section */
+EOF
+
+# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
+cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
+ 99999999999999999999999 /* ${APP_NAME}.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "${APP_NAME}.app"; sourceTree = BUILT_PRODUCTS_DIR; };
+ 22222222222222222222222 /* App.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = App.swift; sourceTree = "<group>"; };
+ 44444444444444444444444 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
+ AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
+ 66666666666666666666666 /* llama.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; path = llama.xcframework; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+EOF
+
+# Add the rest of the project file with fixed framework search paths
+cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
+/* Begin PBXFrameworksBuildPhase section */
+ BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */ = {
+ isa = PBXFrameworksBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 55555555555555555555555 /* llama.xcframework in Frameworks */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+EOF
+
+# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
+cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
+ CCCCCCCCCCCCCCCCCCCCCCCC /* Products */ = {
+ isa = PBXGroup;
+ children = (
+ 99999999999999999999999 /* ${APP_NAME}.app */,
+ );
+ name = Products;
+ sourceTree = "<group>";
+ };
+EOF
+
+cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
+ DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */ = {
+ isa = PBXGroup;
+ children = (
+ 66666666666666666666666 /* llama.xcframework */,
+ );
+ name = Frameworks;
+ sourceTree = "<group>";
+ };
+ EEEEEEEEEEEEEEEEEEEEEEEE = {
+ isa = PBXGroup;
+ children = (
+ FFFFFFFFFFFFFFFFFFFFFFFF /* VisionOSLlamaTest */,
+ CCCCCCCCCCCCCCCCCCCCCCCC /* Products */,
+ DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */,
+ );
+ sourceTree = "<group>";
+ };
+ FFFFFFFFFFFFFFFFFFFFFFFF /* VisionOSLlamaTest */ = {
+ isa = PBXGroup;
+ children = (
+ 1111111111111111111111AA /* Sources */,
+ AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */,
+ );
+ path = "VisionOSLlamaTest";
+ sourceTree = "<group>";
+ };
+ 1111111111111111111111AA /* Sources */ = {
+ isa = PBXGroup;
+ children = (
+ 22222222222222222222222 /* App.swift */,
+ 44444444444444444444444 /* ContentView.swift */,
+ );
+ path = Sources;
+ sourceTree = "<group>";
+ };
+/* End PBXGroup section */
+EOF
+
+# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
+cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
+/* Begin PBXNativeTarget section */
+ 3333333333333333333333AA /* ${APP_NAME} */ = {
+ isa = PBXNativeTarget;
+ buildConfigurationList = 4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */;
+ buildPhases = (
+ 5555555555555555555555AA /* Sources */,
+ BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */,
+ 6666666666666666666666AA /* Resources */,
+ 88888888888888888888888 /* Embed Frameworks */,
+ );
+ buildRules = (
+ );
+ dependencies = (
+ );
+ name = "${APP_NAME}";
+ productName = "${APP_NAME}";
+ productReference = 99999999999999999999999 /* ${APP_NAME}.app */;
+ productType = "com.apple.product-type.application";
+ };
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+ 7777777777777777777777AA /* Project object */ = {
+ isa = PBXProject;
+ attributes = {
+ LastSwiftUpdateCheck = 1510;
+ LastUpgradeCheck = 1510;
+ TargetAttributes = {
+ 3333333333333333333333AA = {
+ CreatedOnToolsVersion = 15.1;
+ };
+ };
+ };
+ buildConfigurationList = 8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */;
+ compatibilityVersion = "Xcode 15.0";
+ developmentRegion = en;
+ hasScannedForEncodings = 0;
+ knownRegions = (
+ en,
+ Base,
+ );
+ mainGroup = EEEEEEEEEEEEEEEEEEEEEEEE;
+ productRefGroup = CCCCCCCCCCCCCCCCCCCCCCCC /* Products */;
+ projectDirPath = "";
+ projectRoot = "";
+ targets = (
+ 3333333333333333333333AA /* ${APP_NAME} */,
+ );
+ };
+/* End PBXProject section */
+EOF
+
+# Add the rest of the file with correct FRAMEWORK_SEARCH_PATHS
+cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
+/* Begin PBXResourcesBuildPhase section */
+ 6666666666666666666666AA /* Resources */ = {
+ isa = PBXResourcesBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXResourcesBuildPhase section */
+
+/* Begin PBXSourcesBuildPhase section */
+ 5555555555555555555555AA /* Sources */ = {
+ isa = PBXSourcesBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 33333333333333333333333 /* ContentView.swift in Sources */,
+ 11111111111111111111111 /* App.swift in Sources */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+ 9999999999999999999999AA /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ALWAYS_SEARCH_USER_PATHS = NO;
+ CLANG_ANALYZER_NONNULL = YES;
+ CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+ CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+ CLANG_CXX_LIBRARY = "libc++";
+ CLANG_ENABLE_MODULES = YES;
+ CLANG_ENABLE_OBJC_ARC = YES;
+ CLANG_ENABLE_OBJC_WEAK = YES;
+ CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+ CLANG_WARN_BOOL_CONVERSION = YES;
+ CLANG_WARN_COMMA = YES;
+ CLANG_WARN_CONSTANT_CONVERSION = YES;
+ CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+ CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+ CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+ CLANG_WARN_EMPTY_BODY = YES;
+ CLANG_WARN_ENUM_CONVERSION = YES;
+ CLANG_WARN_INFINITE_RECURSION = YES;
+ CLANG_WARN_INT_CONVERSION = YES;
+ CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+ CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+ CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+ CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+ CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
+ CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+ CLANG_WARN_STRICT_PROTOTYPES = YES;
+ CLANG_WARN_SUSPICIOUS_MOVE = YES;
+ CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+ CLANG_WARN_UNREACHABLE_CODE = YES;
+ CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+ COPY_PHASE_STRIP = NO;
+ DEBUG_INFORMATION_FORMAT = dwarf;
+ ENABLE_STRICT_OBJC_MSGSEND = YES;
+ ENABLE_TESTABILITY = YES;
+ GCC_C_LANGUAGE_STANDARD = gnu11;
+ GCC_DYNAMIC_NO_PIC = NO;
+ GCC_NO_COMMON_BLOCKS = YES;
+ GCC_OPTIMIZATION_LEVEL = 0;
+ GCC_PREPROCESSOR_DEFINITIONS = (
+ "DEBUG=1",
+ "$(inherited)",
+ );
+ GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+ GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+ GCC_WARN_UNDECLARED_SELECTOR = YES;
+ GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+ GCC_WARN_UNUSED_FUNCTION = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
+ MTL_FAST_MATH = YES;
+ ONLY_ACTIVE_ARCH = YES;
+ SDKROOT = xros;
+ SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
+ SWIFT_OPTIMIZATION_LEVEL = "-Onone";
+ XROS_DEPLOYMENT_TARGET = 1.0;
+ };
+ name = Debug;
+ };
+ AAAAAAAAAAAAAAAAAAAAABBB /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ALWAYS_SEARCH_USER_PATHS = NO;
+ CLANG_ANALYZER_NONNULL = YES;
+ CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+ CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+ CLANG_CXX_LIBRARY = "libc++";
+ CLANG_ENABLE_MODULES = YES;
+ CLANG_ENABLE_OBJC_ARC = YES;
+ CLANG_ENABLE_OBJC_WEAK = YES;
+ CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+ CLANG_WARN_BOOL_CONVERSION = YES;
+ CLANG_WARN_COMMA = YES;
+ CLANG_WARN_CONSTANT_CONVERSION = YES;
+ CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+ CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+ CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+ CLANG_WARN_EMPTY_BODY = YES;
+ CLANG_WARN_ENUM_CONVERSION = YES;
+ CLANG_WARN_INFINITE_RECURSION = YES;
+ CLANG_WARN_INT_CONVERSION = YES;
+ CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+ CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+ CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+ CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+ CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
+ CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+ CLANG_WARN_STRICT_PROTOTYPES = YES;
+ CLANG_WARN_SUSPICIOUS_MOVE = YES;
+ CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+ CLANG_WARN_UNREACHABLE_CODE = YES;
+ CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+ COPY_PHASE_STRIP = NO;
+ DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+ ENABLE_NS_ASSERTIONS = NO;
+ ENABLE_STRICT_OBJC_MSGSEND = YES;
+ GCC_C_LANGUAGE_STANDARD = gnu11;
+ GCC_NO_COMMON_BLOCKS = YES;
+ GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+ GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+ GCC_WARN_UNDECLARED_SELECTOR = YES;
+ GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+ GCC_WARN_UNUSED_FUNCTION = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ MTL_ENABLE_DEBUG_INFO = NO;
+ MTL_FAST_MATH = YES;
+ SDKROOT = xros;
+ SWIFT_COMPILATION_MODE = wholemodule;
+ SWIFT_OPTIMIZATION_LEVEL = "-O";
+ VALIDATE_PRODUCT = YES;
+ XROS_DEPLOYMENT_TARGET = 1.0;
+ };
+ name = Release;
+ };
+ BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+ ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
+ CODE_SIGN_STYLE = Manual;
+ DEVELOPMENT_TEAM = "";
+ ENABLE_PREVIEWS = YES;
+ FRAMEWORK_SEARCH_PATHS = "$(PROJECT_DIR)";
+ INFOPLIST_FILE = "VisionOSLlamaTest/Info.plist";
+ LD_RUNPATH_SEARCH_PATHS = (
+ "$(inherited)",
+ "@executable_path/Frameworks",
+ );
+ PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.VisionOSLlamaTest";
+ PRODUCT_NAME = "$(TARGET_NAME)";
+ PROVISIONING_PROFILE_SPECIFIER = "";
+ SUPPORTED_PLATFORMS = "xros xrsimulator";
+ SWIFT_VERSION = 5.0;
+ TARGETED_DEVICE_FAMILY = "1,2,7";
+ };
+ name = Debug;
+ };
+ CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+ ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
+ CODE_SIGN_STYLE = Manual;
+ DEVELOPMENT_TEAM = "";
+ ENABLE_PREVIEWS = YES;
+ FRAMEWORK_SEARCH_PATHS = (
+ "$(inherited)",
+ "$(PROJECT_DIR)",
+ );
+ INFOPLIST_FILE = "VisionOSLlamaTest/Info.plist";
+ LD_RUNPATH_SEARCH_PATHS = (
+ "$(inherited)",
+ "@executable_path/Frameworks",
+ );
+ PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.VisionOSLlamaTest";
+ PRODUCT_NAME = "$(TARGET_NAME)";
+ PROVISIONING_PROFILE_SPECIFIER = "";
+ SUPPORTED_PLATFORMS = "xros xrsimulator";
+ SWIFT_VERSION = 5.0;
+ TARGETED_DEVICE_FAMILY = "1,2,7";
+ };
+ name = Release;
+ };
+/* End XCBuildConfiguration section */
+EOF
+
+# Finish the project.pbxproj file
+cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
+/* Begin XCConfigurationList section */
+ 8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 9999999999999999999999AA /* Debug */,
+ AAAAAAAAAAAAAAAAAAAAABBB /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+ 4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */,
+ CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+/* End XCConfigurationList section */
+ };
+ rootObject = 7777777777777777777777AA /* Project object */;
+}
+EOF
+
+# 2. Copy XCFramework to test project
+echo "Copying XCFramework to test project..."
+cp -R "${XCFRAMEWORK_PATH}" "${TEMP_DIR}/${APP_NAME}/"
+
+# 3. Build and archive the app
+echo "Building and archiving test app..."
+cd "${TEMP_DIR}/${APP_NAME}"
+
+# Create a simple xcscheme file to avoid xcodebuild scheme issues
+mkdir -p "${APP_NAME}.xcodeproj/xcshareddata/xcschemes"
+cat > "${APP_NAME}.xcodeproj/xcshareddata/xcschemes/${APP_NAME}.xcscheme" << EOF
+<?xml version="1.0" encoding="UTF-8"?>
+<Scheme
+ LastUpgradeVersion = "1510"
+ version = "1.3">
+ <BuildAction
+ parallelizeBuildables = "YES"
+ buildImplicitDependencies = "YES">
+ <BuildActionEntries>
+ <BuildActionEntry
+ buildForTesting = "YES"
+ buildForRunning = "YES"
+ buildForProfiling = "YES"
+ buildForArchiving = "YES"
+ buildForAnalyzing = "YES">
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "3333333333333333333333AA"
+ BuildableName = "${APP_NAME}.app"
+ BlueprintName = "${APP_NAME}"
+ ReferencedContainer = "container:${APP_NAME}.xcodeproj">
+ </BuildableReference>
+ </BuildActionEntry>
+ </BuildActionEntries>
+ </BuildAction>
+ <TestAction
+ buildConfiguration = "Debug"
+ selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
+ selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
+ shouldUseLaunchSchemeArgsEnv = "YES">
+ <Testables>
+ </Testables>
+ </TestAction>
+ <LaunchAction
+ buildConfiguration = "Debug"
+ selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
+ selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
+ launchStyle = "0"
+ useCustomWorkingDirectory = "NO"
+ ignoresPersistentStateOnLaunch = "NO"
+ debugDocumentVersioning = "YES"
+ debugServiceExtension = "internal"
+ allowLocationSimulation = "YES">
+ <BuildableProductRunnable
+ runnableDebuggingMode = "0">
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "3333333333333333333333AA"
+ BuildableName = "${APP_NAME}.app"
+ BlueprintName = "${APP_NAME}"
+ ReferencedContainer = "container:${APP_NAME}.xcodeproj">
+ </BuildableReference>
+ </BuildableProductRunnable>
+ </LaunchAction>
+ <ProfileAction
+ buildConfiguration = "Release"
+ shouldUseLaunchSchemeArgsEnv = "YES"
+ savedToolIdentifier = ""
+ useCustomWorkingDirectory = "NO"
+ debugDocumentVersioning = "YES">
+ <BuildableProductRunnable
+ runnableDebuggingMode = "0">
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "3333333333333333333333AA"
+ BuildableName = "${APP_NAME}.app"
+ BlueprintName = "${APP_NAME}"
+ ReferencedContainer = "container:${APP_NAME}.xcodeproj">
+ </BuildableReference>
+ </BuildableProductRunnable>
+ </ProfileAction>
+ <AnalyzeAction
+ buildConfiguration = "Debug">
+ </AnalyzeAction>
+ <ArchiveAction
+ buildConfiguration = "Release"
+ revealArchiveInOrganizer = "YES">
+ </ArchiveAction>
+</Scheme>
+EOF
+
+# Now use xcodebuild with an explicitly defined product name for visionOS
+xcodebuild -project "${APP_NAME}.xcodeproj" -scheme "${APP_NAME}" -sdk xros -configuration Release archive -archivePath "${ARCHIVE_PATH}" CODE_SIGN_IDENTITY="-" CODE_SIGNING_REQUIRED=NO CODE_SIGNING_ALLOWED=NO PRODUCT_NAME="${APP_NAME}" SWIFT_OPTIMIZATION_LEVEL="-Onone" -quiet
+
+# 4. Create IPA from archive
+echo "Creating IPA from archive..."
+mkdir -p "${TEMP_DIR}/Payload"
+cp -R "${ARCHIVE_PATH}/Products/Applications/${APP_NAME}.app" "${TEMP_DIR}/Payload/"
+
+# Check and log app structure before zipping
+echo "App structure:"
+ls -la "${TEMP_DIR}/Payload/${APP_NAME}.app/"
+echo "Frameworks:"
+ls -la "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/" 2>/dev/null || echo "No Frameworks directory found"
+
+cd "${TEMP_DIR}"
+zip -r "${IPA_PATH}" Payload
+
+# Check embedded provisioning profile
+echo "Checking provisioning profile (if any)..."
+PROVISIONING_PROFILE=$(find "${ARCHIVE_PATH}/Products/Applications/${APP_NAME}.app" -name "embedded.mobileprovision" 2>/dev/null)
+if [ -n "$PROVISIONING_PROFILE" ]; then
+ echo "Found embedded provisioning profile:"
+ security cms -D -i "$PROVISIONING_PROFILE" || echo "Unable to decode provisioning profile"
+else
+ echo "No embedded provisioning profile found (expected for ad-hoc builds)"
+fi
+
+# 5. Validate the IPA
+echo "Validating IPA..."
+VALIDATION_OUTPUT="${VALIDATION_DIR}/validation_output.txt"
+
+# Check if authentication credentials are provided
+AUTH_ARGS=""
+if [ -n "$APPLE_ID" ] && [ -n "$APPLE_PASSWORD" ]; then
+ echo "Using Apple ID authentication for validation..."
+ AUTH_ARGS="--username \"$APPLE_ID\" --password \"$APPLE_PASSWORD\""
+else
+ echo "No authentication credentials provided. Will perform basic validation."
+ echo "To use your personal developer account, you can run the script with:"
+ echo " APPLE_ID='your.email@example.com' APPLE_PASSWORD='your-app-specific-password' ./validate-visionos.sh"
+ echo "Note: You need to create an app-specific password at https://appleid.apple.com/account/manage"
+fi
+
+# Run validation with detailed output
+echo "Running validation with altool..."
+if [ -n "$AUTH_ARGS" ]; then
+ # Use eval to properly handle the quoted arguments
+ eval "xcrun altool --validate-app -f \"${IPA_PATH}\" --type visionos --output-format xml $AUTH_ARGS" 2>&1 | tee "${VALIDATION_OUTPUT}"
+else
+ xcrun altool --validate-app -f "${IPA_PATH}" --type visionos --output-format xml 2>&1 | tee "${VALIDATION_OUTPUT}"
+fi
+VALIDATION_RESULT=$?
+
+# Final validation result
+FINAL_VALIDATION_RESULT=0
+
+# Check if validation failed because the app isn't in App Store Connect
+if grep -q "No suitable application records were found" "${VALIDATION_OUTPUT}"; then
+ echo "⚠️ App Store Connect Warning: The app bundle identifier is not found in App Store Connect"
+ echo "This is expected for apps that haven't been registered in App Store Connect yet."
+ echo "This doesn't indicate a problem with the build or framework."
+
+ # Perform alternative validation
+ echo "Performing alternative validation checks..."
+
+ # Check if IPA was created successfully
+ if [ -f "${IPA_PATH}" ] && [ -s "${IPA_PATH}" ]; then
+ echo "✅ IPA file created successfully"
+ else
+ echo "❌ IPA file not created or empty"
+ FINAL_VALIDATION_RESULT=1
+ fi
+
+ # Check if app binary exists and is executable
+ if [ -f "${TEMP_DIR}/Payload/${APP_NAME}.app/${APP_NAME}" ] && [ -x "${TEMP_DIR}/Payload/${APP_NAME}.app/${APP_NAME}" ]; then
+ echo "✅ App binary exists and is executable"
+ else
+ echo "❌ App binary missing or not executable"
+ FINAL_VALIDATION_RESULT=1
+ fi
+
+ # Check if framework was properly embedded
+ if [ -d "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework" ]; then
+ echo "✅ llama.framework properly embedded"
+ else
+ echo "❌ llama.framework not properly embedded"
+ FINAL_VALIDATION_RESULT=1
+ fi
+
+ # Check if framework binary exists
+ if [ -f "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework/llama" ]; then
+ echo "✅ Framework binary exists"
+
+ # Further validate framework by checking architecture
+ ARCHS=$(lipo -info "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework/llama" 2>/dev/null | grep -o "arm64\\|x86_64" | tr '\n' ' ')
+ if [ -n "$ARCHS" ]; then
+ echo "✅ Framework architecture(s): $ARCHS"
+ else
+ echo "⚠️ Could not determine framework architecture"
+ fi
+ else
+ echo "❌ Framework binary missing"
+ FINAL_VALIDATION_RESULT=1
+ fi
+
+ if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then
+ echo "✅ Alternative validation PASSED: App built successfully with embedded framework"
+ else
+ echo "❌ Alternative validation FAILED: Issues found with the app or framework"
+ fi
+elif grep -q "You must specify authentication credentials" "${VALIDATION_OUTPUT}" && [ -z "$AUTH_ARGS" ]; then
+ echo "✅ visionOS Validation PASSED: IPA successfully validated"
+ echo "Results saved to ${VALIDATION_OUTPUT}"
+else
+ echo "❌ visionOS Validation FAILED: IPA validation found issues"
+ echo "See validation output at ${VALIDATION_OUTPUT}"
+ echo ""
+ echo "==== VALIDATION ERRORS ===="
+
+ # Try to extract specific errors from the output
+ if grep -q "Error" "${VALIDATION_OUTPUT}"; then
+ grep -A 5 "Error" "${VALIDATION_OUTPUT}"
+ else
+ # If no specific error found, show the whole log
+ cat "${VALIDATION_OUTPUT}"
+ fi
+
+ # Additional debugging: check IPA contents
+ echo ""
+ echo "==== IPA CONTENTS ===="
+ mkdir -p "${TEMP_DIR}/ipa_contents"
+ unzip -q "${IPA_PATH}" -d "${TEMP_DIR}/ipa_contents"
+ ls -la "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app/"
+
+ # Check for code signing issues
+ echo ""
+ echo "==== CODE SIGNING INFO ===="
+ codesign -vv -d "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app" 2>&1 || echo "Code signing verification failed"
+
+ # Check embedded frameworks
+ echo ""
+ echo "==== FRAMEWORK INFO ===="
+ ls -la "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app/Frameworks/" 2>/dev/null || echo "No Frameworks directory found"
+fi
+
+# Don't clean up on error to allow inspection
+if [ $FINAL_VALIDATION_RESULT -ne 0 ]; then
+ echo ""
+ echo "Temporary files kept for inspection at: ${TEMP_DIR}"
+ echo "===== visionOS Validation Process Failed ====="
+ exit 1
+fi
+
+# Clean up temporary files but keep build artifacts
+if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then
+ echo "Cleaning up temporary files..."
+ #rm -rf "${TEMP_DIR}"
+fi
+
+echo "===== visionOS Validation Process Completed ====="
+exit $FINAL_VALIDATION_RESULT
diff --git a/llama.cpp/scripts/bench-models.sh b/llama.cpp/scripts/bench-models.sh
new file mode 100755
index 0000000..c241013
--- /dev/null
+++ b/llama.cpp/scripts/bench-models.sh
@@ -0,0 +1,82 @@
+#!/usr/bin/env bash
+
+RESULTS="bench-models-results.txt"
+: > "$RESULTS"
+
+ARGS_BB="-c 270336 -npp 512,4096,8192 -npl 1,2,4,8,16,32 -ntg 32"
+ARGS_B="-d 0,4096,8192,16384,32768 -p 2048 -n 32"
+
+QUICK=0
+DIO=0
+while (( "$#" )); do
+ case "$1" in
+ --quick) QUICK=1; shift ;;
+ --dio) DIO=1; shift ;;
+ *) shift ;;
+ esac
+done
+
+if (( QUICK )); then
+ ARGS_BB="-c 20480 -npp 512,4096 -npl 1,2,4 -ntg 32"
+ ARGS_B="-d 0 -p 2048 -n 32"
+fi
+
+if (( DIO )); then
+ ARGS_BB="${ARGS_BB} --no-mmap --direct-io"
+ ARGS_B="${ARGS_B} -mmp 0 -dio 1"
+fi
+
+run_model() {
+ local HFR=$1
+ local HFF=$2
+
+ printf "## ${HFR}\n" | tee -a "$RESULTS"
+ printf "\n" | tee -a "$RESULTS"
+ printf "Model: https://huggingface.co/${HFR}\n" | tee -a "$RESULTS"
+ printf "\n" | tee -a "$RESULTS"
+
+ printf -- "- \`llama-batched-bench\`\n" | tee -a "$RESULTS"
+ printf "\n" | tee -a "$RESULTS"
+
+ ./bin/llama-batched-bench \
+ -hfr "${HFR}" -hff "${HFF}" \
+ -m "${HFF}" -fa 1 -ub 2048 \
+ ${ARGS_BB} | tee -a "$RESULTS"
+
+ printf "\n" | tee -a "$RESULTS"
+
+ printf -- "- \`llama-bench\`\n" | tee -a "$RESULTS"
+ printf "\n" | tee -a "$RESULTS"
+
+ ./bin/llama-bench \
+ -m "${HFF}" -fa 1 -ub 2048 \
+ ${ARGS_B} | tee -a "$RESULTS"
+
+ printf "\n" | tee -a "$RESULTS"
+
+ printf "\n"
+}
+
+run_model "ggml-org/gpt-oss-20b-GGUF" "gpt-oss-20b-mxfp4.gguf"
+run_model "ggml-org/gpt-oss-120b-GGUF" "gpt-oss-120b-mxfp4-00001-of-00003.gguf"
+run_model "ggml-org/Qwen3-Coder-30B-A3B-Instruct-Q8_0-GGUF" "qwen3-coder-30b-a3b-instruct-q8_0.gguf"
+run_model "ggml-org/Qwen2.5-Coder-7B-Q8_0-GGUF" "qwen2.5-coder-7b-q8_0.gguf"
+run_model "ggml-org/gemma-3-4b-it-qat-GGUF" "gemma-3-4b-it-qat-Q4_0.gguf"
+run_model "ggml-org/GLM-4.7-Flash-GGUF" "GLM-4.7-Flash-Q8_0.gguf"
+
+if [[ -f models-extra.txt ]]; then
+ while read -r HFR HFF; do
+ [[ -z "$HFR" ]] && continue
+ run_model "$HFR" "$HFF"
+ done < models-extra.txt
+fi
+
+printf "\n=====================================\n"
+printf "\n"
+
+cat "$RESULTS"
+
+printf "\n"
+printf "Done! Results are written to $RESULTS\n"
+printf "\n"
+
diff --git a/llama.cpp/scripts/build-info.sh b/llama.cpp/scripts/build-info.sh
new file mode 100755
index 0000000..fa9e7ba
--- /dev/null
+++ b/llama.cpp/scripts/build-info.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+CC=$1
+
+build_number="0"
+build_commit="unknown"
+build_compiler="unknown"
+build_target="unknown"
+
+if out=$(git rev-list --count HEAD); then
+ # git is broken on WSL so we need to strip extra newlines
+ build_number=$(printf '%s' "$out" | tr -d '\n')
+fi
+
+if out=$(git rev-parse --short HEAD); then
+ build_commit=$(printf '%s' "$out" | tr -d '\n')
+fi
+
+if out=$($CC --version | head -1); then
+ build_compiler=$out
+fi
+
+if out=$($CC -dumpmachine); then
+ build_target=$out
+fi
+
+echo "int LLAMA_BUILD_NUMBER = ${build_number};"
+echo "char const *LLAMA_COMMIT = \"${build_commit}\";"
+echo "char const *LLAMA_COMPILER = \"${build_compiler}\";"
+echo "char const *LLAMA_BUILD_TARGET = \"${build_target}\";"
diff --git a/llama.cpp/scripts/check-requirements.sh b/llama.cpp/scripts/check-requirements.sh
new file mode 100755
index 0000000..da2357d
--- /dev/null
+++ b/llama.cpp/scripts/check-requirements.sh
@@ -0,0 +1,179 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+#
+# check-requirements.sh checks all requirements files for each top-level
+# convert*.py script.
+#
+# WARNING: This is quite IO intensive, because a fresh venv is set up for every
+# python script. As of 2023-12-22, this writes ~2.7GB of data. An adequately
+# sized tmpfs /tmp or ramdisk is recommended if running this frequently.
+#
+# usage: check-requirements.sh [<working_dir>]
+# check-requirements.sh nocleanup [<working_dir>]
+#
+# where:
+# - <working_dir> is a directory that can be used as the base for
+# setting up the venvs. Defaults to `/tmp`.
+# - 'nocleanup' as the first argument will disable automatic cleanup
+# of the files created by this script.
+#
+# requires:
+# - bash >= 3.2.57
+# - shellcheck
+#
+# For each script, it creates a fresh venv, `pip install`s the requirements, and
+# finally imports the python script to check for `ImportError`.
+#
+
+log() {
+ local level=$1 msg=$2
+ printf >&2 '%s: %s\n' "$level" "$msg"
+}
+
+debug() {
+ log DEBUG "$@"
+}
+
+info() {
+ log INFO "$@"
+}
+
+fatal() {
+ log FATAL "$@"
+ exit 1
+}
+
+cleanup() {
+ if [[ -n ${workdir+x} && -d $workdir && -w $workdir ]]; then
+ info "Removing $workdir"
+ local count=0
+ rm -rfv -- "$workdir" | while read -r; do
+ if (( count++ > 750 )); then
+ printf .
+ count=0
+ fi
+ done
+ printf '\n'
+ info "Removed $workdir"
+ fi
+}
+
+do_cleanup=1
+if [[ ${1-} == nocleanup ]]; then
+ do_cleanup=0; shift
+fi
+
+if (( do_cleanup )); then
+ trap exit INT TERM
+ trap cleanup EXIT
+fi
+
+this=$(realpath -- "$0"); readonly this
+cd "$(dirname "$this")/.." # PWD should stay in llama.cpp project directory
+
+shellcheck "$this"
+
+readonly reqs_dir=requirements
+
+if [[ ${1+x} ]]; then
+ tmp_dir=$(realpath -- "$1")
+ if [[ ! ( -d $tmp_dir && -w $tmp_dir ) ]]; then
+ fatal "$tmp_dir is not a writable directory"
+ fi
+else
+ tmp_dir=/tmp
+fi
+
+workdir=$(mktemp -d "$tmp_dir/check-requirements.XXXX"); readonly workdir
+info "Working directory: $workdir"
+
+check_requirements() {
+ local reqs=$1
+
+ info "$reqs: beginning check"
+ pip --disable-pip-version-check install -qr "$reqs"
+ info "$reqs: OK"
+}
+
+check_convert_script() {
+ local py=$1 # e.g. ./convert_hf_to_gguf.py
+ local pyname=${py##*/} # e.g. convert_hf_to_gguf.py
+ pyname=${pyname%.py} # e.g. convert_hf_to_gguf
+
+ info "$py: beginning check"
+
+ local reqs="$reqs_dir/requirements-$pyname.txt"
+ if [[ ! -r $reqs ]]; then
+ fatal "$py missing requirements. Expected: $reqs"
+ fi
+
+ # Check that all sub-requirements are added to top-level requirements.txt
+ if ! grep -qF "$reqs" requirements.txt; then
+ fatal "$reqs needs to be added to requirements.txt"
+ fi
+
+ local venv="$workdir/$pyname-venv"
+ python3 -m venv "$venv"
+
+ (
+ # shellcheck source=/dev/null
+ source "$venv/bin/activate"
+
+ check_requirements "$reqs"
+
+ python - "$py" "$pyname" <<'EOF'
+import sys
+from importlib.machinery import SourceFileLoader
+py, pyname = sys.argv[1:]
+SourceFileLoader(pyname, py).load_module()
+EOF
+ )
+
+ if (( do_cleanup )); then
+ rm -rf -- "$venv"
+ fi
+
+ info "$py: imports OK"
+}
+
+readonly ignore_eq_eq='check_requirements: ignore "=="'
+
+for req in */**/requirements*.txt; do
+ # Make sure exact release versions aren't being pinned in the requirements
+ # Filters out the ignore string
+ if grep -vF "$ignore_eq_eq" "$req" | grep -q '=='; then
+ tab=$'\t'
+ cat >&2 <<EOF
+FATAL: Avoid pinning exact package versions. Use '~=' instead.
+You can suppress this error by appending the following to the line:
+$tab# $ignore_eq_eq
+EOF
+ exit 1
+ fi
+done
+
+all_venv="$workdir/all-venv"
+python3 -m venv "$all_venv"
+
+(
+ # shellcheck source=/dev/null
+ source "$all_venv/bin/activate"
+ check_requirements requirements.txt
+)
+
+if (( do_cleanup )); then
+ rm -rf -- "$all_venv"
+fi
+
+check_convert_script examples/convert_legacy_llama.py
+for py in convert_*.py; do
+ # skip convert_hf_to_gguf_update.py
+ # TODO: the check is failing for some reason:
+ # https://github.com/ggml-org/llama.cpp/actions/runs/8875330981/job/24364557177?pr=6920
+ [[ $py == convert_hf_to_gguf_update.py ]] && continue
+
+ check_convert_script "$py"
+done
+
+info 'Done! No issues found.'
diff --git a/llama.cpp/scripts/compare-commits.sh b/llama.cpp/scripts/compare-commits.sh
new file mode 100755
index 0000000..1802d6e
--- /dev/null
+++ b/llama.cpp/scripts/compare-commits.sh
@@ -0,0 +1,66 @@
+#!/usr/bin/env bash
+
+if [ $# -lt 2 ]; then
+ echo "usage: ./scripts/compare-commits.sh <commit1> <commit2> [tool] [additional arguments]"
+ echo " tool: 'llama-bench' (default) or 'test-backend-ops'"
+ echo " additional arguments: passed to the selected tool"
+ exit 1
+fi
+
+set -e
+set -x
+
+# Parse arguments
+commit1=$1
+commit2=$2
+tool=${3:-llama-bench}
+additional_args="${@:4}"
+
+# Validate tool argument
+if [ "$tool" != "llama-bench" ] && [ "$tool" != "test-backend-ops" ]; then
+ echo "Error: tool must be 'llama-bench' or 'test-backend-ops'"
+ exit 1
+fi
+
+# verify at the start that the compare script has all the necessary dependencies installed
+./scripts/compare-llama-bench.py --check
+
+if ! command -v sqlite3 >/dev/null 2>&1; then
+ echo "Error: sqlite3 is not installed or not in PATH"
+ echo "Please install sqlite3 to use this script"
+ exit 1
+fi
+
+if [ "$tool" = "llama-bench" ]; then
+ db_file="llama-bench.sqlite"
+ target="llama-bench"
+ run_args="-o sql -oe md $additional_args"
+else # test-backend-ops
+ db_file="test-backend-ops.sqlite"
+ target="test-backend-ops"
+ run_args="perf --output sql $additional_args"
+fi
+
+rm -f "$db_file" > /dev/null
+
+# to test a backend, call the script with the corresponding environment variable (e.g. GGML_CUDA=1 ./scripts/compare-commits.sh ...)
+if [ -n "$GGML_CUDA" ]; then
+ CMAKE_OPTS="${CMAKE_OPTS} -DGGML_CUDA=ON"
+fi
+
+dir="build-bench"
+
+function run {
+ rm -fr ${dir} > /dev/null
+ cmake -B ${dir} -S . ${CMAKE_OPTS} > /dev/null
+ cmake --build ${dir} -t $target -j $(nproc) > /dev/null
+ ${dir}/bin/$target $run_args | sqlite3 "$db_file"
+}
+
+git checkout $commit1 > /dev/null
+run
+
+git checkout $commit2 > /dev/null
+run
+
+./scripts/compare-llama-bench.py -b $commit1 -c $commit2 --tool $tool -i "$db_file"
diff --git a/llama.cpp/scripts/compare-llama-bench.py b/llama.cpp/scripts/compare-llama-bench.py
new file mode 100755
index 0000000..9541b89
--- /dev/null
+++ b/llama.cpp/scripts/compare-llama-bench.py
@@ -0,0 +1,1093 @@
+#!/usr/bin/env python3
+
+import argparse
+import csv
+import heapq
+import json
+import logging
+import os
+import sqlite3
+import sys
+from collections.abc import Iterator, Sequence
+from glob import glob
+from typing import Any, Optional, Union
+
+try:
+ import git
+ from tabulate import tabulate
+except ImportError as e:
+ print("the following Python libraries are required: GitPython, tabulate.") # noqa: NP100
+ raise e
+
+
+logger = logging.getLogger("compare-llama-bench")
+
+# All llama-bench SQL fields
+LLAMA_BENCH_DB_FIELDS = [
+ "build_commit", "build_number", "cpu_info", "gpu_info", "backends", "model_filename",
+ "model_type", "model_size", "model_n_params", "n_batch", "n_ubatch", "n_threads",
+ "cpu_mask", "cpu_strict", "poll", "type_k", "type_v", "n_gpu_layers",
+ "split_mode", "main_gpu", "no_kv_offload", "flash_attn", "tensor_split", "tensor_buft_overrides",
+ "use_mmap", "embeddings", "no_op_offload", "n_prompt", "n_gen", "n_depth",
+ "test_time", "avg_ns", "stddev_ns", "avg_ts", "stddev_ts", "n_cpu_moe"
+]
+
+LLAMA_BENCH_DB_TYPES = [
+ "TEXT", "INTEGER", "TEXT", "TEXT", "TEXT", "TEXT",
+ "TEXT", "INTEGER", "INTEGER", "INTEGER", "INTEGER", "INTEGER",
+ "TEXT", "INTEGER", "INTEGER", "TEXT", "TEXT", "INTEGER",
+ "TEXT", "INTEGER", "INTEGER", "INTEGER", "TEXT", "TEXT",
+ "INTEGER", "INTEGER", "INTEGER", "INTEGER", "INTEGER", "INTEGER",
+ "TEXT", "INTEGER", "INTEGER", "REAL", "REAL", "INTEGER",
+]
+
+# All test-backend-ops SQL fields
+TEST_BACKEND_OPS_DB_FIELDS = [
+ "test_time", "build_commit", "backend_name", "op_name", "op_params", "test_mode",
+ "supported", "passed", "error_message", "time_us", "flops", "bandwidth_gb_s",
+ "memory_kb", "n_runs"
+]
+
+TEST_BACKEND_OPS_DB_TYPES = [
+ "TEXT", "TEXT", "TEXT", "TEXT", "TEXT", "TEXT",
+ "INTEGER", "INTEGER", "TEXT", "REAL", "REAL", "REAL",
+ "INTEGER", "INTEGER"
+]
+
+assert len(LLAMA_BENCH_DB_FIELDS) == len(LLAMA_BENCH_DB_TYPES)
+assert len(TEST_BACKEND_OPS_DB_FIELDS) == len(TEST_BACKEND_OPS_DB_TYPES)
+
+# Properties by which to differentiate results per commit for llama-bench:
+LLAMA_BENCH_KEY_PROPERTIES = [
+ "cpu_info", "gpu_info", "backends", "n_gpu_layers", "n_cpu_moe", "tensor_buft_overrides", "model_filename", "model_type",
+ "n_batch", "n_ubatch", "embeddings", "cpu_mask", "cpu_strict", "poll", "n_threads", "type_k", "type_v",
+ "use_mmap", "no_kv_offload", "split_mode", "main_gpu", "tensor_split", "flash_attn", "n_prompt", "n_gen", "n_depth"
+]
+
+# Properties by which to differentiate results per commit for test-backend-ops:
+TEST_BACKEND_OPS_KEY_PROPERTIES = [
+ "backend_name", "op_name", "op_params", "test_mode"
+]
+
+# Properties that are boolean and are converted to Yes/No for the table:
+LLAMA_BENCH_BOOL_PROPERTIES = ["embeddings", "cpu_strict", "use_mmap", "no_kv_offload", "flash_attn"]
+TEST_BACKEND_OPS_BOOL_PROPERTIES = ["supported", "passed"]
+
+# Header names for the table (llama-bench):
+LLAMA_BENCH_PRETTY_NAMES = {
+ "cpu_info": "CPU", "gpu_info": "GPU", "backends": "Backends", "n_gpu_layers": "GPU layers",
+ "tensor_buft_overrides": "Tensor overrides", "model_filename": "File", "model_type": "Model", "model_size": "Model size [GiB]",
+ "model_n_params": "Num. of par.", "n_batch": "Batch size", "n_ubatch": "Microbatch size", "embeddings": "Embeddings",
+ "cpu_mask": "CPU mask", "cpu_strict": "CPU strict", "poll": "Poll", "n_threads": "Threads", "type_k": "K type", "type_v": "V type",
+ "use_mmap": "Use mmap", "no_kv_offload": "NKVO", "split_mode": "Split mode", "main_gpu": "Main GPU", "tensor_split": "Tensor split",
+ "flash_attn": "FlashAttention",
+}
+
+# Header names for the table (test-backend-ops):
+TEST_BACKEND_OPS_PRETTY_NAMES = {
+ "backend_name": "Backend", "op_name": "GGML op", "op_params": "Op parameters", "test_mode": "Mode",
+ "supported": "Supported", "passed": "Passed", "error_message": "Error",
+ "flops": "FLOPS", "bandwidth_gb_s": "Bandwidth (GB/s)", "memory_kb": "Memory (KB)", "n_runs": "Runs"
+}
+
+DEFAULT_SHOW_LLAMA_BENCH = ["model_type"] # Always show these properties by default.
+DEFAULT_HIDE_LLAMA_BENCH = ["model_filename"] # Always hide these properties by default.
+
+DEFAULT_SHOW_TEST_BACKEND_OPS = ["backend_name", "op_name"] # Always show these properties by default.
+DEFAULT_HIDE_TEST_BACKEND_OPS = ["error_message"] # Always hide these properties by default.
+
+GPU_NAME_STRIP = ["NVIDIA GeForce ", "Tesla ", "AMD Radeon ", "AMD Instinct "] # Strip prefixes for smaller tables.
+MODEL_SUFFIX_REPLACE = {" - Small": "_S", " - Medium": "_M", " - Large": "_L"}
+
+DESCRIPTION = """Creates tables from llama-bench or test-backend-ops data written to multiple JSON/CSV files, a single JSONL file or SQLite database. Example usage (Linux):
+
+For llama-bench:
+$ git checkout master
+$ cmake -B ${BUILD_DIR} ${CMAKE_OPTS} && cmake --build ${BUILD_DIR} -t llama-bench -j $(nproc)
+$ ./llama-bench -o sql | sqlite3 llama-bench.sqlite
+$ git checkout some_branch
+$ cmake -B ${BUILD_DIR} ${CMAKE_OPTS} && cmake --build ${BUILD_DIR} -t llama-bench -j $(nproc)
+$ ./llama-bench -o sql | sqlite3 llama-bench.sqlite
+$ ./scripts/compare-llama-bench.py
+
+For test-backend-ops:
+$ git checkout master
+$ cmake -B ${BUILD_DIR} ${CMAKE_OPTS} && cmake --build ${BUILD_DIR} -t test-backend-ops -j $(nproc)
+$ ./test-backend-ops perf --output sql | sqlite3 test-backend-ops.sqlite
+$ git checkout some_branch
+$ cmake -B ${BUILD_DIR} ${CMAKE_OPTS} && cmake --build ${BUILD_DIR} -t test-backend-ops -j $(nproc)
+$ ./test-backend-ops perf --output sql | sqlite3 test-backend-ops.sqlite
+$ ./scripts/compare-llama-bench.py --tool test-backend-ops -i test-backend-ops.sqlite
+
+Performance numbers from multiple runs per commit are averaged WITHOUT being weighted by the --repetitions parameter of llama-bench.
+"""
+
+parser = argparse.ArgumentParser(
+ description=DESCRIPTION, formatter_class=argparse.RawDescriptionHelpFormatter)
+help_b = (
+ "The baseline commit to compare performance to. "
+ "Accepts either a branch name, tag name, or commit hash. "
+ "Defaults to latest master commit with data."
+)
+parser.add_argument("-b", "--baseline", help=help_b)
+help_c = (
+ "The commit whose performance is to be compared to the baseline. "
+ "Accepts either a branch name, tag name, or commit hash. "
+ "Defaults to the non-master commit for which llama-bench was run most recently."
+)
+parser.add_argument("-c", "--compare", help=help_c)
+help_t = (
+ "The tool whose data is being compared. "
+ "Either 'llama-bench' or 'test-backend-ops'. "
+ "This determines the database schema and comparison logic used. "
+ "If left unspecified, try to determine from the input file."
+)
+parser.add_argument("-t", "--tool", help=help_t, default=None, choices=[None, "llama-bench", "test-backend-ops"])
+help_i = (
+ "JSON/JSONL/SQLite/CSV files for comparing commits. "
+ "Specify multiple times to use multiple input files (JSON/CSV only). "
+ "Defaults to 'llama-bench.sqlite' in the current working directory. "
+ "If no such file is found and there is exactly one .sqlite file in the current directory, "
+ "that file is instead used as input."
+)
+parser.add_argument("-i", "--input", action="append", help=help_i)
+help_o = (
+ "Output format for the table. "
+ "Defaults to 'pipe' (GitHub compatible). "
+ "Also supports e.g. 'latex' or 'mediawiki'. "
+ "See tabulate documentation for full list."
+)
+parser.add_argument("-o", "--output", help=help_o, default="pipe")
+help_s = (
+ "Columns to add to the table. "
+ "Accepts a comma-separated list of values. "
+ f"Legal values for test-backend-ops: {', '.join(TEST_BACKEND_OPS_KEY_PROPERTIES)}. "
+ f"Legal values for llama-bench: {', '.join(LLAMA_BENCH_KEY_PROPERTIES[:-3])}. "
+ "Defaults to model name (model_type) and CPU and/or GPU name (cpu_info, gpu_info) "
+ "plus any column where not all data points are the same. "
+ "If the columns are manually specified, then the results for each unique combination of the "
+ "specified values are averaged WITHOUT weighing by the --repetitions parameter of llama-bench."
+)
+parser.add_argument("--check", action="store_true", help="check if all required Python libraries are installed")
+parser.add_argument("-s", "--show", help=help_s)
+parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
+parser.add_argument("--plot", help="generate a performance comparison plot and save to specified file (e.g., plot.png)")
+parser.add_argument("--plot_x", help="parameter to use as x axis for plotting (default: n_depth)", default="n_depth")
+parser.add_argument("--plot_log_scale", action="store_true", help="use log scale for x axis in plots (off by default)")
+
+known_args, unknown_args = parser.parse_known_args()
+
+logging.basicConfig(level=logging.DEBUG if known_args.verbose else logging.INFO)
+
+
+if known_args.check:
+ # Check if all required Python libraries are installed. Would have failed earlier if not.
+ sys.exit(0)
+
+if unknown_args:
+ logger.error(f"Received unknown args: {unknown_args}.\n")
+ parser.print_help()
+ sys.exit(1)
+
+input_file = known_args.input
+tool = known_args.tool
+
+if not input_file:
+ if tool == "llama-bench" and os.path.exists("./llama-bench.sqlite"):
+ input_file = ["llama-bench.sqlite"]
+ elif tool == "test-backend-ops" and os.path.exists("./test-backend-ops.sqlite"):
+ input_file = ["test-backend-ops.sqlite"]
+
+if not input_file:
+ sqlite_files = glob("*.sqlite")
+ if len(sqlite_files) == 1:
+ input_file = sqlite_files
+
+if not input_file:
+ logger.error("Cannot find a suitable input file, please provide one.\n")
+ parser.print_help()
+ sys.exit(1)
+
+
+class LlamaBenchData:
+ repo: Optional[git.Repo]
+ build_len_min: int
+ build_len_max: int
+ build_len: int = 8
+ builds: list[str] = []
+ tool: str = "llama-bench" # Tool type: "llama-bench" or "test-backend-ops"
+
+ def __init__(self, tool: str = "llama-bench"):
+ self.tool = tool
+ try:
+ self.repo = git.Repo(".", search_parent_directories=True)
+ except git.InvalidGitRepositoryError:
+ self.repo = None
+
+ # Set schema-specific properties based on tool
+ if self.tool == "llama-bench":
+ self.check_keys = set(LLAMA_BENCH_KEY_PROPERTIES + ["build_commit", "test_time", "avg_ts"])
+ elif self.tool == "test-backend-ops":
+ self.check_keys = set(TEST_BACKEND_OPS_KEY_PROPERTIES + ["build_commit", "test_time"])
+ else:
+ assert False
+
+ def _builds_init(self):
+ self.build_len = self.build_len_min
+
+ def _check_keys(self, keys: set) -> Optional[set]:
+ """Private helper method that checks against required data keys and returns missing ones."""
+ if not keys >= self.check_keys:
+ return self.check_keys - keys
+ return None
+
+ def find_parent_in_data(self, commit: git.Commit) -> Optional[str]:
+ """Helper method to find the most recent parent measured in number of commits for which there is data."""
+ heap: list[tuple[int, git.Commit]] = [(0, commit)]
+ seen_hexsha8 = set()
+ while heap:
+ depth, current_commit = heapq.heappop(heap)
+ current_hexsha8 = commit.hexsha[:self.build_len]
+ if current_hexsha8 in self.builds:
+ return current_hexsha8
+ for parent in commit.parents:
+ parent_hexsha8 = parent.hexsha[:self.build_len]
+ if parent_hexsha8 not in seen_hexsha8:
+ seen_hexsha8.add(parent_hexsha8)
+ heapq.heappush(heap, (depth + 1, parent))
+ return None
+
+ def get_all_parent_hexsha8s(self, commit: git.Commit) -> Sequence[str]:
+ """Helper method to recursively get hexsha8 values for all parents of a commit."""
+ unvisited = [commit]
+ visited = []
+
+ while unvisited:
+ current_commit = unvisited.pop(0)
+ visited.append(current_commit.hexsha[:self.build_len])
+ for parent in current_commit.parents:
+ if parent.hexsha[:self.build_len] not in visited:
+ unvisited.append(parent)
+
+ return visited
+
+ def get_commit_name(self, hexsha8: str) -> str:
+ """Helper method to find a human-readable name for a commit if possible."""
+ if self.repo is None:
+ return hexsha8
+ for h in self.repo.heads:
+ if h.commit.hexsha[:self.build_len] == hexsha8:
+ return h.name
+ for t in self.repo.tags:
+ if t.commit.hexsha[:self.build_len] == hexsha8:
+ return t.name
+ return hexsha8
+
+ def get_commit_hexsha8(self, name: str) -> Optional[str]:
+ """Helper method to search for a commit given a human-readable name."""
+ if self.repo is None:
+ return None
+ for h in self.repo.heads:
+ if h.name == name:
+ return h.commit.hexsha[:self.build_len]
+ for t in self.repo.tags:
+ if t.name == name:
+ return t.commit.hexsha[:self.build_len]
+ for c in self.repo.iter_commits("--all"):
+ if c.hexsha[:self.build_len] == name[:self.build_len]:
+ return c.hexsha[:self.build_len]
+ return None
+
+ def builds_timestamp(self, reverse: bool = False) -> Union[Iterator[tuple], Sequence[tuple]]:
+ """Helper method that gets rows of (build_commit, test_time) sorted by the latter."""
+ return []
+
+ def get_rows(self, properties: list[str], hexsha8_baseline: str, hexsha8_compare: str) -> Sequence[tuple]:
+ """
+ Helper method that gets table rows for some list of properties.
+ Rows are created by combining those where all provided properties are equal.
+ The resulting rows are then grouped by the provided properties and the t/s values are averaged.
+ The returned rows are unique in terms of property combinations.
+ """
+ return []
+
+
+class LlamaBenchDataSQLite3(LlamaBenchData):
+ connection: Optional[sqlite3.Connection] = None
+ cursor: sqlite3.Cursor
+ table_name: str
+
+ def __init__(self, tool: str = "llama-bench"):
+ super().__init__(tool)
+ if self.connection is None:
+ self.connection = sqlite3.connect(":memory:")
+ self.cursor = self.connection.cursor()
+
+ # Set table name and schema based on tool
+ if self.tool == "llama-bench":
+ self.table_name = "llama_bench"
+ db_fields = LLAMA_BENCH_DB_FIELDS
+ db_types = LLAMA_BENCH_DB_TYPES
+ elif self.tool == "test-backend-ops":
+ self.table_name = "test_backend_ops"
+ db_fields = TEST_BACKEND_OPS_DB_FIELDS
+ db_types = TEST_BACKEND_OPS_DB_TYPES
+ else:
+ assert False
+
+ self.cursor.execute(f"CREATE TABLE {self.table_name}({', '.join(' '.join(x) for x in zip(db_fields, db_types))});")
+
+ def _builds_init(self):
+ if self.connection:
+ self.build_len_min = self.cursor.execute(f"SELECT MIN(LENGTH(build_commit)) from {self.table_name};").fetchone()[0]
+ self.build_len_max = self.cursor.execute(f"SELECT MAX(LENGTH(build_commit)) from {self.table_name};").fetchone()[0]
+
+ if self.build_len_min != self.build_len_max:
+ logger.warning("Data contains commit hashes of differing lengths. It's possible that the wrong commits will be compared. "
+ "Try purging the the database of old commits.")
+ self.cursor.execute(f"UPDATE {self.table_name} SET build_commit = SUBSTRING(build_commit, 1, {self.build_len_min});")
+
+ builds = self.cursor.execute(f"SELECT DISTINCT build_commit FROM {self.table_name};").fetchall()
+ self.builds = list(map(lambda b: b[0], builds)) # list[tuple[str]] -> list[str]
+ super()._builds_init()
+
+ def builds_timestamp(self, reverse: bool = False) -> Union[Iterator[tuple], Sequence[tuple]]:
+ data = self.cursor.execute(
+ f"SELECT build_commit, test_time FROM {self.table_name} ORDER BY test_time;").fetchall()
+ return reversed(data) if reverse else data
+
+ def get_rows(self, properties: list[str], hexsha8_baseline: str, hexsha8_compare: str) -> Sequence[tuple]:
+ if self.tool == "llama-bench":
+ return self._get_rows_llama_bench(properties, hexsha8_baseline, hexsha8_compare)
+ elif self.tool == "test-backend-ops":
+ return self._get_rows_test_backend_ops(properties, hexsha8_baseline, hexsha8_compare)
+ else:
+ assert False
+
+ def _get_rows_llama_bench(self, properties: list[str], hexsha8_baseline: str, hexsha8_compare: str) -> Sequence[tuple]:
+ select_string = ", ".join(
+ [f"tb.{p}" for p in properties] + ["tb.n_prompt", "tb.n_gen", "tb.n_depth", "AVG(tb.avg_ts)", "AVG(tc.avg_ts)"])
+ equal_string = " AND ".join(
+ [f"tb.{p} = tc.{p}" for p in LLAMA_BENCH_KEY_PROPERTIES] + [
+ f"tb.build_commit = '{hexsha8_baseline}'", f"tc.build_commit = '{hexsha8_compare}'"]
+ )
+ group_order_string = ", ".join([f"tb.{p}" for p in properties] + ["tb.n_gen", "tb.n_prompt", "tb.n_depth"])
+ query = (f"SELECT {select_string} FROM {self.table_name} tb JOIN {self.table_name} tc ON {equal_string} "
+ f"GROUP BY {group_order_string} ORDER BY {group_order_string};")
+ return self.cursor.execute(query).fetchall()
+
+ def _get_rows_test_backend_ops(self, properties: list[str], hexsha8_baseline: str, hexsha8_compare: str) -> Sequence[tuple]:
+ # For test-backend-ops, we compare FLOPS and bandwidth metrics (prioritizing FLOPS over bandwidth)
+ select_string = ", ".join(
+ [f"tb.{p}" for p in properties] + [
+ "AVG(tb.flops)", "AVG(tc.flops)",
+ "AVG(tb.bandwidth_gb_s)", "AVG(tc.bandwidth_gb_s)"
+ ])
+ equal_string = " AND ".join(
+ [f"tb.{p} = tc.{p}" for p in TEST_BACKEND_OPS_KEY_PROPERTIES] + [
+ f"tb.build_commit = '{hexsha8_baseline}'", f"tc.build_commit = '{hexsha8_compare}'",
+ "tb.supported = 1", "tc.supported = 1", "tb.passed = 1", "tc.passed = 1"] # Only compare successful tests
+ )
+ group_order_string = ", ".join([f"tb.{p}" for p in properties])
+ query = (f"SELECT {select_string} FROM {self.table_name} tb JOIN {self.table_name} tc ON {equal_string} "
+ f"GROUP BY {group_order_string} ORDER BY {group_order_string};")
+ return self.cursor.execute(query).fetchall()
+
+
+class LlamaBenchDataSQLite3File(LlamaBenchDataSQLite3):
+ def __init__(self, data_file: str, tool: Any):
+ self.connection = sqlite3.connect(data_file)
+ self.cursor = self.connection.cursor()
+
+ # Check which table exists in the database
+ tables = self.cursor.execute("SELECT name FROM sqlite_master WHERE type='table';").fetchall()
+ table_names = [table[0] for table in tables]
+
+ # Tool selection logic
+ if tool is None:
+ if "llama_bench" in table_names:
+ self.table_name = "llama_bench"
+ tool = "llama-bench"
+ elif "test_backend_ops" in table_names:
+ self.table_name = "test_backend_ops"
+ tool = "test-backend-ops"
+ else:
+ raise RuntimeError(f"No suitable table found in database. Available tables: {table_names}")
+ elif tool == "llama-bench":
+ if "llama_bench" in table_names:
+ self.table_name = "llama_bench"
+ tool = "llama-bench"
+ else:
+ raise RuntimeError(f"Table 'test' not found for tool 'llama-bench'. Available tables: {table_names}")
+ elif tool == "test-backend-ops":
+ if "test_backend_ops" in table_names:
+ self.table_name = "test_backend_ops"
+ tool = "test-backend-ops"
+ else:
+ raise RuntimeError(f"Table 'test_backend_ops' not found for tool 'test-backend-ops'. Available tables: {table_names}")
+ else:
+ raise RuntimeError(f"Unknown tool: {tool}")
+
+ super().__init__(tool)
+ self._builds_init()
+
+ @staticmethod
+ def valid_format(data_file: str) -> bool:
+ connection = sqlite3.connect(data_file)
+ cursor = connection.cursor()
+
+ try:
+ if cursor.execute("PRAGMA schema_version;").fetchone()[0] == 0:
+ raise sqlite3.DatabaseError("The provided input file does not exist or is empty.")
+ except sqlite3.DatabaseError as e:
+ logger.debug(f'"{data_file}" is not a valid SQLite3 file.', exc_info=e)
+ cursor = None
+
+ connection.close()
+ return True if cursor else False
+
+
+class LlamaBenchDataJSONL(LlamaBenchDataSQLite3):
+ def __init__(self, data_file: str, tool: str = "llama-bench"):
+ super().__init__(tool)
+
+ # Get the appropriate field list based on tool
+ db_fields = LLAMA_BENCH_DB_FIELDS if tool == "llama-bench" else TEST_BACKEND_OPS_DB_FIELDS
+
+ with open(data_file, "r", encoding="utf-8") as fp:
+ for i, line in enumerate(fp):
+ parsed = json.loads(line)
+
+ for k in parsed.keys() - set(db_fields):
+ del parsed[k]
+
+ if (missing_keys := self._check_keys(parsed.keys())):
+ raise RuntimeError(f"Missing required data key(s) at line {i + 1}: {', '.join(missing_keys)}")
+
+ self.cursor.execute(f"INSERT INTO {self.table_name}({', '.join(parsed.keys())}) VALUES({', '.join('?' * len(parsed))});", tuple(parsed.values()))
+
+ self._builds_init()
+
+ @staticmethod
+ def valid_format(data_file: str) -> bool:
+ try:
+ with open(data_file, "r", encoding="utf-8") as fp:
+ for line in fp:
+ json.loads(line)
+ break
+ except Exception as e:
+ logger.debug(f'"{data_file}" is not a valid JSONL file.', exc_info=e)
+ return False
+
+ return True
+
+
+class LlamaBenchDataJSON(LlamaBenchDataSQLite3):
+ def __init__(self, data_files: list[str], tool: str = "llama-bench"):
+ super().__init__(tool)
+
+ # Get the appropriate field list based on tool
+ db_fields = LLAMA_BENCH_DB_FIELDS if tool == "llama-bench" else TEST_BACKEND_OPS_DB_FIELDS
+
+ for data_file in data_files:
+ with open(data_file, "r", encoding="utf-8") as fp:
+ parsed = json.load(fp)
+
+ for i, entry in enumerate(parsed):
+ for k in entry.keys() - set(db_fields):
+ del entry[k]
+
+ if (missing_keys := self._check_keys(entry.keys())):
+ raise RuntimeError(f"Missing required data key(s) at entry {i + 1}: {', '.join(missing_keys)}")
+
+ self.cursor.execute(f"INSERT INTO {self.table_name}({', '.join(entry.keys())}) VALUES({', '.join('?' * len(entry))});", tuple(entry.values()))
+
+ self._builds_init()
+
+ @staticmethod
+ def valid_format(data_files: list[str]) -> bool:
+ if not data_files:
+ return False
+
+ for data_file in data_files:
+ try:
+ with open(data_file, "r", encoding="utf-8") as fp:
+ json.load(fp)
+ except Exception as e:
+ logger.debug(f'"{data_file}" is not a valid JSON file.', exc_info=e)
+ return False
+
+ return True
+
+
+class LlamaBenchDataCSV(LlamaBenchDataSQLite3):
+ def __init__(self, data_files: list[str], tool: str = "llama-bench"):
+ super().__init__(tool)
+
+ # Get the appropriate field list based on tool
+ db_fields = LLAMA_BENCH_DB_FIELDS if tool == "llama-bench" else TEST_BACKEND_OPS_DB_FIELDS
+
+ for data_file in data_files:
+ with open(data_file, "r", encoding="utf-8") as fp:
+ for i, parsed in enumerate(csv.DictReader(fp)):
+ keys = set(parsed.keys())
+
+ for k in keys - set(db_fields):
+ del parsed[k]
+
+ if (missing_keys := self._check_keys(keys)):
+ raise RuntimeError(f"Missing required data key(s) at line {i + 1}: {', '.join(missing_keys)}")
+
+ self.cursor.execute(f"INSERT INTO {self.table_name}({', '.join(parsed.keys())}) VALUES({', '.join('?' * len(parsed))});", tuple(parsed.values()))
+
+ self._builds_init()
+
+ @staticmethod
+ def valid_format(data_files: list[str]) -> bool:
+ if not data_files:
+ return False
+
+ for data_file in data_files:
+ try:
+ with open(data_file, "r", encoding="utf-8") as fp:
+ for parsed in csv.DictReader(fp):
+ break
+ except Exception as e:
+ logger.debug(f'"{data_file}" is not a valid CSV file.', exc_info=e)
+ return False
+
+ return True
+
+
+def format_flops(flops_value: float) -> str:
+ """Format FLOPS values with appropriate units for better readability."""
+ if flops_value == 0:
+ return "0.00"
+
+ # Define unit thresholds and names
+ units = [
+ (1e12, "T"), # TeraFLOPS
+ (1e9, "G"), # GigaFLOPS
+ (1e6, "M"), # MegaFLOPS
+ (1e3, "k"), # kiloFLOPS
+ (1, "") # FLOPS
+ ]
+
+ for threshold, unit in units:
+ if abs(flops_value) >= threshold:
+ formatted_value = flops_value / threshold
+ if formatted_value >= 100:
+ return f"{formatted_value:.1f}{unit}"
+ else:
+ return f"{formatted_value:.2f}{unit}"
+
+ # Fallback for very small values
+ return f"{flops_value:.2f}"
+
+
+def format_flops_for_table(flops_value: float, target_unit: str) -> str:
+ """Format FLOPS values for table display without unit suffix (since unit is in header)."""
+ if flops_value == 0:
+ return "0.00"
+
+ # Define unit thresholds based on target unit
+ unit_divisors = {
+ "TFLOPS": 1e12,
+ "GFLOPS": 1e9,
+ "MFLOPS": 1e6,
+ "kFLOPS": 1e3,
+ "FLOPS": 1
+ }
+
+ divisor = unit_divisors.get(target_unit, 1)
+ formatted_value = flops_value / divisor
+
+ if formatted_value >= 100:
+ return f"{formatted_value:.1f}"
+ else:
+ return f"{formatted_value:.2f}"
+
+
+def get_flops_unit_name(flops_values: list) -> str:
+ """Determine the best FLOPS unit name based on the magnitude of values."""
+ if not flops_values or all(v == 0 for v in flops_values):
+ return "FLOPS"
+
+ # Find the maximum absolute value to determine appropriate unit
+ max_flops = max(abs(v) for v in flops_values if v != 0)
+
+ if max_flops >= 1e12:
+ return "TFLOPS"
+ elif max_flops >= 1e9:
+ return "GFLOPS"
+ elif max_flops >= 1e6:
+ return "MFLOPS"
+ elif max_flops >= 1e3:
+ return "kFLOPS"
+ else:
+ return "FLOPS"
+
+
+bench_data = None
+if len(input_file) == 1:
+ if LlamaBenchDataSQLite3File.valid_format(input_file[0]):
+ bench_data = LlamaBenchDataSQLite3File(input_file[0], tool)
+ elif LlamaBenchDataJSON.valid_format(input_file):
+ bench_data = LlamaBenchDataJSON(input_file, tool)
+ elif LlamaBenchDataJSONL.valid_format(input_file[0]):
+ bench_data = LlamaBenchDataJSONL(input_file[0], tool)
+ elif LlamaBenchDataCSV.valid_format(input_file):
+ bench_data = LlamaBenchDataCSV(input_file, tool)
+else:
+ if LlamaBenchDataJSON.valid_format(input_file):
+ bench_data = LlamaBenchDataJSON(input_file, tool)
+ elif LlamaBenchDataCSV.valid_format(input_file):
+ bench_data = LlamaBenchDataCSV(input_file, tool)
+
+if not bench_data:
+ raise RuntimeError("No valid (or some invalid) input files found.")
+
+if not bench_data.builds:
+ raise RuntimeError(f"{input_file} does not contain any builds.")
+
+tool = bench_data.tool # May have chosen a default if tool was None.
+
+
+hexsha8_baseline = name_baseline = None
+
+# If the user specified a baseline, try to find a commit for it:
+if known_args.baseline is not None:
+ if known_args.baseline in bench_data.builds:
+ hexsha8_baseline = known_args.baseline
+ if hexsha8_baseline is None:
+ hexsha8_baseline = bench_data.get_commit_hexsha8(known_args.baseline)
+ name_baseline = known_args.baseline
+ if hexsha8_baseline is None:
+ logger.error(f"cannot find data for baseline={known_args.baseline}.")
+ sys.exit(1)
+# Otherwise, search for the most recent parent of master for which there is data:
+elif bench_data.repo is not None:
+ hexsha8_baseline = bench_data.find_parent_in_data(bench_data.repo.heads.master.commit)
+
+ if hexsha8_baseline is None:
+ logger.error("No baseline was provided and did not find data for any master branch commits.\n")
+ parser.print_help()
+ sys.exit(1)
+else:
+ logger.error("No baseline was provided and the current working directory "
+ "is not part of a git repository from which a baseline could be inferred.\n")
+ parser.print_help()
+ sys.exit(1)
+
+
+name_baseline = bench_data.get_commit_name(hexsha8_baseline)
+
+hexsha8_compare = name_compare = None
+
+# If the user has specified a compare value, try to find a corresponding commit:
+if known_args.compare is not None:
+ if known_args.compare in bench_data.builds:
+ hexsha8_compare = known_args.compare
+ if hexsha8_compare is None:
+ hexsha8_compare = bench_data.get_commit_hexsha8(known_args.compare)
+ name_compare = known_args.compare
+ if hexsha8_compare is None:
+ logger.error(f"cannot find data for compare={known_args.compare}.")
+ sys.exit(1)
+# Otherwise, search for the commit for llama-bench was most recently run
+# and that is not a parent of master:
+elif bench_data.repo is not None:
+ hexsha8s_master = bench_data.get_all_parent_hexsha8s(bench_data.repo.heads.master.commit)
+ for (hexsha8, _) in bench_data.builds_timestamp(reverse=True):
+ if hexsha8 not in hexsha8s_master:
+ hexsha8_compare = hexsha8
+ break
+
+ if hexsha8_compare is None:
+ logger.error("No compare target was provided and did not find data for any non-master commits.\n")
+ parser.print_help()
+ sys.exit(1)
+else:
+ logger.error("No compare target was provided and the current working directory "
+ "is not part of a git repository from which a compare target could be inferred.\n")
+ parser.print_help()
+ sys.exit(1)
+
+name_compare = bench_data.get_commit_name(hexsha8_compare)
+
+# Get tool-specific configuration
+if tool == "llama-bench":
+ key_properties = LLAMA_BENCH_KEY_PROPERTIES
+ bool_properties = LLAMA_BENCH_BOOL_PROPERTIES
+ pretty_names = LLAMA_BENCH_PRETTY_NAMES
+ default_show = DEFAULT_SHOW_LLAMA_BENCH
+ default_hide = DEFAULT_HIDE_LLAMA_BENCH
+elif tool == "test-backend-ops":
+ key_properties = TEST_BACKEND_OPS_KEY_PROPERTIES
+ bool_properties = TEST_BACKEND_OPS_BOOL_PROPERTIES
+ pretty_names = TEST_BACKEND_OPS_PRETTY_NAMES
+ default_show = DEFAULT_SHOW_TEST_BACKEND_OPS
+ default_hide = DEFAULT_HIDE_TEST_BACKEND_OPS
+else:
+ assert False
+
+# If the user provided columns to group the results by, use them:
+if known_args.show is not None:
+ show = known_args.show.split(",")
+ unknown_cols = []
+ for prop in show:
+ valid_props = key_properties if tool == "test-backend-ops" else key_properties[:-3] # Exclude n_prompt, n_gen, n_depth for llama-bench
+ if prop not in valid_props:
+ unknown_cols.append(prop)
+ if unknown_cols:
+ logger.error(f"Unknown values for --show: {', '.join(unknown_cols)}")
+ parser.print_usage()
+ sys.exit(1)
+ rows_show = bench_data.get_rows(show, hexsha8_baseline, hexsha8_compare)
+# Otherwise, select those columns where the values are not all the same:
+else:
+ rows_full = bench_data.get_rows(key_properties, hexsha8_baseline, hexsha8_compare)
+ properties_different = []
+
+ if tool == "llama-bench":
+ # For llama-bench, skip n_prompt, n_gen, n_depth from differentiation logic
+ check_properties = [kp for kp in key_properties if kp not in ["n_prompt", "n_gen", "n_depth"]]
+ for i, kp_i in enumerate(key_properties):
+ if kp_i in default_show or kp_i in ["n_prompt", "n_gen", "n_depth"]:
+ continue
+ for row_full in rows_full:
+ if row_full[i] != rows_full[0][i]:
+ properties_different.append(kp_i)
+ break
+ elif tool == "test-backend-ops":
+ # For test-backend-ops, check all key properties
+ for i, kp_i in enumerate(key_properties):
+ if kp_i in default_show:
+ continue
+ for row_full in rows_full:
+ if row_full[i] != rows_full[0][i]:
+ properties_different.append(kp_i)
+ break
+ else:
+ assert False
+
+ show = []
+
+ if tool == "llama-bench":
+ # Show CPU and/or GPU by default even if the hardware for all results is the same:
+ if rows_full and "n_gpu_layers" not in properties_different:
+ ngl = int(rows_full[0][key_properties.index("n_gpu_layers")])
+
+ if ngl != 99 and "cpu_info" not in properties_different:
+ show.append("cpu_info")
+
+ show += properties_different
+
+ index_default = 0
+ for prop in ["cpu_info", "gpu_info", "n_gpu_layers", "main_gpu"]:
+ if prop in show:
+ index_default += 1
+ show = show[:index_default] + default_show + show[index_default:]
+ elif tool == "test-backend-ops":
+ show = default_show + properties_different
+ else:
+ assert False
+
+ for prop in default_hide:
+ try:
+ show.remove(prop)
+ except ValueError:
+ pass
+
+ # Add plot_x parameter to parameters to show if it's not already present:
+ if known_args.plot:
+ for k, v in pretty_names.items():
+ if v == known_args.plot_x and k not in show:
+ show.append(k)
+ break
+
+ rows_show = bench_data.get_rows(show, hexsha8_baseline, hexsha8_compare)
+
+if not rows_show:
+ logger.error(f"No comparable data was found between {name_baseline} and {name_compare}.\n")
+ sys.exit(1)
+
+table = []
+primary_metric = "FLOPS" # Default to FLOPS for test-backend-ops
+
+if tool == "llama-bench":
+ # For llama-bench, create test names and compare avg_ts values
+ for row in rows_show:
+ n_prompt = int(row[-5])
+ n_gen = int(row[-4])
+ n_depth = int(row[-3])
+ if n_prompt != 0 and n_gen == 0:
+ test_name = f"pp{n_prompt}"
+ elif n_prompt == 0 and n_gen != 0:
+ test_name = f"tg{n_gen}"
+ else:
+ test_name = f"pp{n_prompt}+tg{n_gen}"
+ if n_depth != 0:
+ test_name = f"{test_name}@d{n_depth}"
+ # Regular columns test name avg t/s values Speedup
+ # VVVVVVVVVVVVV VVVVVVVVV VVVVVVVVVVVVVV VVVVVVV
+ table.append(list(row[:-5]) + [test_name] + list(row[-2:]) + [float(row[-1]) / float(row[-2])])
+elif tool == "test-backend-ops":
+ # Determine the primary metric by checking rows until we find one with valid data
+ if rows_show:
+ primary_metric = "FLOPS" # Default to FLOPS
+ flops_values = []
+
+ # Collect all FLOPS values to determine the best unit
+ for sample_row in rows_show:
+ baseline_flops = float(sample_row[-4])
+ compare_flops = float(sample_row[-3])
+ baseline_bandwidth = float(sample_row[-2])
+
+ if baseline_flops > 0:
+ flops_values.extend([baseline_flops, compare_flops])
+ elif baseline_bandwidth > 0 and not flops_values:
+ primary_metric = "Bandwidth (GB/s)"
+
+ # If we have FLOPS data, determine the appropriate unit
+ if flops_values:
+ primary_metric = get_flops_unit_name(flops_values)
+
+ # For test-backend-ops, prioritize FLOPS > bandwidth for comparison
+ for row in rows_show:
+ # Extract metrics: flops, bandwidth_gb_s (baseline and compare)
+ baseline_flops = float(row[-4])
+ compare_flops = float(row[-3])
+ baseline_bandwidth = float(row[-2])
+ compare_bandwidth = float(row[-1])
+
+ # Determine which metric to use for comparison (prioritize FLOPS > bandwidth)
+ if baseline_flops > 0 and compare_flops > 0:
+ # Use FLOPS comparison (higher is better)
+ speedup = compare_flops / baseline_flops
+ baseline_str = format_flops_for_table(baseline_flops, primary_metric)
+ compare_str = format_flops_for_table(compare_flops, primary_metric)
+ elif baseline_bandwidth > 0 and compare_bandwidth > 0:
+ # Use bandwidth comparison (higher is better)
+ speedup = compare_bandwidth / baseline_bandwidth
+ baseline_str = f"{baseline_bandwidth:.2f}"
+ compare_str = f"{compare_bandwidth:.2f}"
+ else:
+ # Fallback if no valid data is available
+ baseline_str = "N/A"
+ compare_str = "N/A"
+ from math import nan
+ speedup = nan
+
+ table.append(list(row[:-4]) + [baseline_str, compare_str, speedup])
+else:
+ assert False
+
+# Some a-posteriori fixes to make the table contents prettier:
+for bool_property in bool_properties:
+ if bool_property in show:
+ ip = show.index(bool_property)
+ for row_table in table:
+ row_table[ip] = "Yes" if int(row_table[ip]) == 1 else "No"
+
+if tool == "llama-bench":
+ if "model_type" in show:
+ ip = show.index("model_type")
+ for (old, new) in MODEL_SUFFIX_REPLACE.items():
+ for row_table in table:
+ row_table[ip] = row_table[ip].replace(old, new)
+
+ if "model_size" in show:
+ ip = show.index("model_size")
+ for row_table in table:
+ row_table[ip] = float(row_table[ip]) / 1024 ** 3
+
+ if "gpu_info" in show:
+ ip = show.index("gpu_info")
+ for row_table in table:
+ for gns in GPU_NAME_STRIP:
+ row_table[ip] = row_table[ip].replace(gns, "")
+
+ gpu_names = row_table[ip].split(", ")
+ num_gpus = len(gpu_names)
+ all_names_the_same = len(set(gpu_names)) == 1
+ if len(gpu_names) >= 2 and all_names_the_same:
+ row_table[ip] = f"{num_gpus}x {gpu_names[0]}"
+
+headers = [pretty_names.get(p, p) for p in show]
+if tool == "llama-bench":
+ headers += ["Test", f"t/s {name_baseline}", f"t/s {name_compare}", "Speedup"]
+elif tool == "test-backend-ops":
+ headers += [f"{primary_metric} {name_baseline}", f"{primary_metric} {name_compare}", "Speedup"]
+else:
+ assert False
+
+if known_args.plot:
+ def create_performance_plot(table_data: list[list[str]], headers: list[str], baseline_name: str, compare_name: str, output_file: str, plot_x_param: str, log_scale: bool = False, tool_type: str = "llama-bench", metric_name: str = "t/s"):
+ try:
+ import matplotlib
+ import matplotlib.pyplot as plt
+ matplotlib.use('Agg')
+ except ImportError as e:
+ logger.error("matplotlib is required for --plot.")
+ raise e
+
+ data_headers = headers[:-4] # Exclude the last 4 columns (Test, baseline t/s, compare t/s, Speedup)
+ plot_x_index = None
+ plot_x_label = plot_x_param
+
+ if plot_x_param not in ["n_prompt", "n_gen", "n_depth"]:
+ pretty_name = LLAMA_BENCH_PRETTY_NAMES.get(plot_x_param, plot_x_param)
+ if pretty_name in data_headers:
+ plot_x_index = data_headers.index(pretty_name)
+ plot_x_label = pretty_name
+ elif plot_x_param in data_headers:
+ plot_x_index = data_headers.index(plot_x_param)
+ plot_x_label = plot_x_param
+ else:
+ logger.error(f"Parameter '{plot_x_param}' not found in current table columns. Available columns: {', '.join(data_headers)}")
+ return
+
+ grouped_data = {}
+
+ for i, row in enumerate(table_data):
+ group_key_parts = []
+ test_name = row[-4]
+
+ base_test = ""
+ x_value = None
+
+ if plot_x_param in ["n_prompt", "n_gen", "n_depth"]:
+ for j, val in enumerate(row[:-4]):
+ header_name = data_headers[j]
+ if val is not None and str(val).strip():
+ group_key_parts.append(f"{header_name}={val}")
+
+ if plot_x_param == "n_prompt" and "pp" in test_name:
+ base_test = test_name.split("@")[0]
+ x_value = base_test
+ elif plot_x_param == "n_gen" and "tg" in test_name:
+ x_value = test_name.split("@")[0]
+ elif plot_x_param == "n_depth" and "@d" in test_name:
+ base_test = test_name.split("@d")[0]
+ x_value = int(test_name.split("@d")[1])
+ else:
+ base_test = test_name
+
+ if base_test.strip():
+ group_key_parts.append(f"Test={base_test}")
+ else:
+ for j, val in enumerate(row[:-4]):
+ if j != plot_x_index:
+ header_name = data_headers[j]
+ if val is not None and str(val).strip():
+ group_key_parts.append(f"{header_name}={val}")
+ else:
+ x_value = val
+
+ group_key_parts.append(f"Test={test_name}")
+
+ group_key = tuple(group_key_parts)
+
+ if group_key not in grouped_data:
+ grouped_data[group_key] = []
+
+ grouped_data[group_key].append({
+ 'x_value': x_value,
+ 'baseline': float(row[-3]),
+ 'compare': float(row[-2]),
+ 'speedup': float(row[-1])
+ })
+
+ if not grouped_data:
+ logger.error("No data available for plotting")
+ return
+
+ def make_axes(num_groups, max_cols=2, base_size=(8, 4)):
+ from math import ceil
+ cols = 1 if num_groups == 1 else min(max_cols, num_groups)
+ rows = ceil(num_groups / cols)
+
+ # Scale figure size by grid dimensions
+ w, h = base_size
+ fig, ax_arr = plt.subplots(rows, cols,
+ figsize=(w * cols, h * rows),
+ squeeze=False)
+
+ axes = ax_arr.flatten()[:num_groups]
+ return fig, axes
+
+ num_groups = len(grouped_data)
+ fig, axes = make_axes(num_groups)
+
+ plot_idx = 0
+
+ for group_key, points in grouped_data.items():
+ if plot_idx >= len(axes):
+ break
+ ax = axes[plot_idx]
+
+ try:
+ points_sorted = sorted(points, key=lambda p: float(p['x_value']) if p['x_value'] is not None else 0)
+ x_values = [float(p['x_value']) if p['x_value'] is not None else 0 for p in points_sorted]
+ except ValueError:
+ points_sorted = sorted(points, key=lambda p: group_key)
+ x_values = [p['x_value'] for p in points_sorted]
+
+ baseline_vals = [p['baseline'] for p in points_sorted]
+ compare_vals = [p['compare'] for p in points_sorted]
+
+ ax.plot(x_values, baseline_vals, 'o-', color='skyblue',
+ label=f'{baseline_name}', linewidth=2, markersize=6)
+ ax.plot(x_values, compare_vals, 's--', color='lightcoral', alpha=0.8,
+ label=f'{compare_name}', linewidth=2, markersize=6)
+
+ if log_scale:
+ ax.set_xscale('log', base=2)
+ unique_x = sorted(set(x_values))
+ ax.set_xticks(unique_x)
+ ax.set_xticklabels([str(int(x)) for x in unique_x])
+
+ title_parts = []
+ for part in group_key:
+ if '=' in part:
+ key, value = part.split('=', 1)
+ title_parts.append(f"{key}: {value}")
+
+ title = ', '.join(title_parts) if title_parts else "Performance comparison"
+
+ # Determine y-axis label based on tool type
+ if tool_type == "llama-bench":
+ y_label = "Tokens per second (t/s)"
+ elif tool_type == "test-backend-ops":
+ y_label = metric_name
+ else:
+ assert False
+
+ ax.set_xlabel(plot_x_label, fontsize=12, fontweight='bold')
+ ax.set_ylabel(y_label, fontsize=12, fontweight='bold')
+ ax.set_title(title, fontsize=12, fontweight='bold')
+ ax.legend(loc='best', fontsize=10)
+ ax.grid(True, alpha=0.3)
+
+ plot_idx += 1
+
+ for i in range(plot_idx, len(axes)):
+ axes[i].set_visible(False)
+
+ fig.suptitle(f'Performance comparison: {compare_name} vs. {baseline_name}',
+ fontsize=14, fontweight='bold')
+ fig.subplots_adjust(top=1)
+
+ plt.tight_layout()
+ plt.savefig(output_file, dpi=300, bbox_inches='tight')
+ plt.close()
+
+ create_performance_plot(table, headers, name_baseline, name_compare, known_args.plot, known_args.plot_x, known_args.plot_log_scale, tool, primary_metric)
+
+print(tabulate( # noqa: NP100
+ table,
+ headers=headers,
+ floatfmt=".2f",
+ tablefmt=known_args.output
+))
diff --git a/llama.cpp/scripts/compare-logprobs.py b/llama.cpp/scripts/compare-logprobs.py
new file mode 100644
index 0000000..63861dd
--- /dev/null
+++ b/llama.cpp/scripts/compare-logprobs.py
@@ -0,0 +1,281 @@
+import argparse
+import requests
+import json
+from pathlib import Path
+import logging
+
+logger = logging.getLogger("compare-logprobs")
+logging.basicConfig(level=logging.INFO)
+
+
+DESCRIPTION = """
+Compare logits between llama.cpp and another inference engine using OpenAI-compatible server endpoints.
+
+Unlike compare-logits.py, it allows dumping logits from a hosted API endpoint. Useful when it's not possible to run both models locally.
+
+Example usage:
+ Step 1: Dump logits from two different servers
+ python scripts/compare-logprobs.py dump logits_llama.log http://localhost:8080/v1/completions
+ python scripts/compare-logprobs.py dump logits_other.log http://other-engine:8000/v1/completions
+
+ (optionally, you can add --api-key <key> if the endpoint requires authentication)
+
+ Step 2: Compare the dumped logits
+ python scripts/compare-logprobs.py compare logits_llama.log logits_other.log report.md
+"""
+
+
+def generate_input_prompt(length: int) -> list[str]:
+ CORPUS = """
+ You are an advanced AI assistant capable of using tools to gather information, perform calculations, or execute tasks. Always think step by step before responding. If a user's query requires external data, computation, or actions beyond your internal knowledge, use the appropriate tools via function calls.
+
+ ### Tool Call Format:
+ When you need to use a tool, output the call in this exact XML format. Include the opening and closing tags. Do not escape arguments; they will be parsed as plain text.
+
+ You can make multiple calls in one go by placing them one after another.
+ """
+ words = [w.strip() for w in CORPUS.strip().split(" ")]
+ words = [w for w in words if len(w) > 0] # filter out empty strings
+ while len(words) < length:
+ words += words
+ return words[:length]
+
+
+def dump_logits(
+ endpoint: str,
+ output_path: Path,
+ input_words: list[str],
+ pattern: list[tuple[bool, int]],
+ api_key=None,
+):
+ logger.info(f"Dumping logits to {output_path} from endpoint {endpoint}...")
+ words = input_words
+ curr_text = ""
+ n_total = sum(n for get, n in pattern if get)
+ n_done = 0
+ i_cur = 0
+ i_total = len(words)
+ with output_path.open("w") as f:
+ for get, n in pattern:
+ if not get:
+ # skip n words
+ for i in range(n):
+ curr_text += words.pop(0) + " "
+ i_cur += 1
+ continue
+ # get n words
+ for i in range(n):
+ curr_text += words.pop(0) + " "
+ payload = {
+ "prompt": curr_text.strip(),
+ "temperature": 0.0,
+ "top_k": 1,
+ "max_tokens": 1,
+ "logprobs": 1,
+ "stream": False,
+ }
+ response = requests.post(
+ endpoint,
+ json=payload,
+ headers={"Authorization": f"Bearer {api_key}"} if api_key else {},
+ )
+ response.raise_for_status()
+ data = response.json()
+ data["__index"] = i_cur # add index for easier debugging later
+ data = json.dumps(data)
+ f.write(f"{data}\n")
+ n_done += 1
+ i_cur += 1
+ logger.info(
+ f"\n\n{data}\n\n[Step: {n_done}/{n_total} | Word: {i_cur}/{i_total}]"
+ )
+ logger.info(f"Logits dumped to {output_path}")
+
+
+def get_token_logprobs(data: dict):
+ logprobs = data["choices"][0]["logprobs"]
+ if "content" in logprobs:
+ # llama.cpp case
+ top = logprobs["content"][0]["top_logprobs"][0]
+ return top["token"], top["logprob"]
+ else:
+ # vllm case
+ tokens = logprobs["tokens"]
+ token_logprobs = logprobs["token_logprobs"]
+ return tokens[0], token_logprobs[0]
+
+
+def clean_text(text: str) -> str:
+ return (
+ "'"
+ + text.replace("\n", "\\n")
+ .replace("\t", "\\t")
+ .replace("\r", "\\r")
+ .replace("|", "\\|")
+ + "'"
+ )
+
+
+def compare_logits(input1: Path, input2: Path, output_path: Path):
+ with input1.open("r") as f1, input2.open("r") as f2, output_path.open("w") as fout:
+ lines1 = f1.readlines()
+ lines2 = f2.readlines()
+
+ tab_header = [
+ "idx",
+ input1.name,
+ "logprob_1",
+ input2.name,
+ "logprob_2",
+ "diff (abs)",
+ ]
+ tab_entries = []
+ tab_max_widths = [len(h) for h in tab_header]
+
+ assert len(lines1) == len(
+ lines2
+ ), "Input files must have the same number of lines."
+
+ fout.write("# Logits Comparison Report\n\n")
+ for i, (line1, line2) in enumerate(zip(lines1, lines2)):
+ if not line1.strip() or not line2.strip():
+ continue # skip empty lines
+
+ data1 = json.loads(line1)
+ data2 = json.loads(line2)
+
+ idx1 = data1.get("__index", -1)
+ idx2 = data2.get("__index", -1)
+ if idx1 != idx2:
+ logger.warning(
+ f"Warning: Mismatched indices at line {i}: {idx1} vs {idx2}"
+ )
+
+ token1, logprob1 = get_token_logprobs(data1)
+ token2, logprob2 = get_token_logprobs(data2)
+
+ token1 = clean_text(token1)
+ token2 = clean_text(token2)
+ abs_diff = abs(logprob1 - logprob2)
+
+ tab_entries.append(
+ (
+ str(idx1 + 1),
+ token1,
+ f"{logprob1:.4f}",
+ token2,
+ f"{logprob2:.4f}",
+ f"{(abs_diff):.4f}",
+ )
+ )
+
+ for i in range(len(tab_entries)):
+ for j in range(len(tab_header)):
+ tab_max_widths[j] = max(tab_max_widths[j], len(tab_entries[i][j]))
+
+ output = ""
+ for j in range(len(tab_header)):
+ output += f"| {tab_header[j]:<{tab_max_widths[j]}} "
+ output += "|\n"
+ for j in range(len(tab_header)):
+ output += f"|{'-' * (tab_max_widths[j] + 2)}"
+ output += "|\n"
+ for entry in tab_entries:
+ for j in range(len(tab_header)):
+ output += f"| {entry[j]:<{tab_max_widths[j]}} "
+ output += "|\n"
+
+ logger.info("\n" + output)
+ fout.write(output)
+ logger.info(f"Report written to {output_path}")
+
+
+def parse_pattern(pattern: str) -> list[tuple[bool, int]]:
+ parts = pattern.split(",")
+ result = []
+ for i, part in enumerate(parts):
+ n = int(part)
+ if i % 2 == 0:
+ result.append((True, n)) # get n words
+ else:
+ result.append((False, n)) # skip n words
+ return result
+
+
+def parse_args() -> argparse.Namespace:
+ parser = argparse.ArgumentParser(
+ description=DESCRIPTION, formatter_class=argparse.RawTextHelpFormatter
+ )
+ subparsers = parser.add_subparsers(
+ dest="verb", required=True, help="action to perform"
+ )
+
+ # dump subcommand
+ parser_dump = subparsers.add_parser("dump", help="dump logits from an endpoint")
+ parser_dump.add_argument(
+ "output", type=Path, help="output path for dumped logits (.log)"
+ )
+ parser_dump.add_argument(
+ "endpoint", type=str, help="OAI-compat /completions endpoint"
+ )
+ parser_dump.add_argument(
+ "--api-key",
+ type=str,
+ default=None,
+ help="API key for authentication (if required)",
+ )
+ parser_dump.add_argument(
+ "--file",
+ type=Path,
+ default=None,
+ help="File containing prompt to use instead of the default",
+ )
+ parser_dump.add_argument(
+ "--pattern",
+ type=str,
+ default="10,1000,10,4000,10",
+ help="Pattern n_get,n_skip,... where n_get is number of words to get and n_skip is number of words to skip (num of words, NOT num of tokens)",
+ )
+
+ # compare subcommand
+ parser_compare = subparsers.add_parser(
+ "compare", help="compare two dumped logits files"
+ )
+ parser_compare.add_argument("input1", type=Path, help="first input file (.log)")
+ parser_compare.add_argument("input2", type=Path, help="second input file (.log)")
+ parser_compare.add_argument(
+ "output", type=Path, help="output path for comparison report (.md)"
+ )
+
+ try:
+ return parser.parse_args()
+ except Exception as e:
+ parser.print_help()
+ raise e
+
+
+def main():
+ args = parse_args()
+
+ if args.verb == "dump":
+ pattern = parse_pattern(args.pattern)
+ input_length = sum(n for _, n in pattern)
+ input_words = generate_input_prompt(input_length)
+ if args.file is not None:
+ with args.file.open("r") as f:
+ input_words = f.read().strip().split(" ")
+ if input_length < sum(n for _, n in pattern):
+ raise ValueError(
+ f"Input file has only {input_length} words, but pattern requires at least {input_length} words."
+ )
+ input_length = len(input_words)
+ logger.info(f"Using {input_length} words")
+ dump_logits(args.endpoint, args.output, input_words, pattern, args.api_key)
+ elif args.verb == "compare":
+ compare_logits(args.input1, args.input2, args.output)
+ else:
+ raise ValueError(f"Unknown verb: {args.verb}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/llama.cpp/scripts/create_ops_docs.py b/llama.cpp/scripts/create_ops_docs.py
new file mode 100755
index 0000000..e3a476a
--- /dev/null
+++ b/llama.cpp/scripts/create_ops_docs.py
@@ -0,0 +1,201 @@
+#!/usr/bin/env python3
+
+"""
+This script parses docs/ops/*.csv and creates the ops.md, which is a table documenting supported operations on various ggml backends.
+"""
+import csv
+import logging
+import sys
+from pathlib import Path
+from collections import defaultdict
+
+
+class DocsGenerator:
+ def __init__(self, ggml_root: str, output_filename: str = "ops.md"):
+ self.ggml_root = Path(ggml_root)
+ self.ops_dir = self.ggml_root / "docs" / "ops"
+ self.output_filename = output_filename
+ self.backend_support: dict[str, dict[str, list[bool]]] = defaultdict(
+ lambda: defaultdict(list)
+ )
+ self.all_operations: set[str] = set()
+ self.all_backends: set[str] = set()
+ self.logger = logging.getLogger(__name__)
+
+ def parse_support_files(self) -> None:
+ if not self.ops_dir.exists():
+ self.logger.warning(f"ops directory not found: {self.ops_dir}")
+ return
+
+ self.logger.info(f"Parsing support files from {self.ops_dir}...")
+
+ for support_file in self.ops_dir.glob("*.csv"):
+ self.logger.info(f" Reading: {support_file.name}")
+ self._parse_support_file(support_file)
+
+ def _parse_support_file(self, file_path: Path) -> None:
+ try:
+ with open(file_path, "r", newline='') as f:
+ reader = csv.DictReader(f)
+
+ for row in reader:
+ # Skip rows that don't have support mode
+ if row.get('test_mode') != 'support':
+ continue
+
+ backend_name = row.get('backend_name', '').strip()
+ operation = row.get('op_name', '').strip()
+ supported_str = row.get('error_message', '').strip() # "yes" or "no"
+ backend_reg_name = row.get('backend_reg_name', '').strip()
+
+ # Skip invalid or error operations
+ if not operation or not backend_name or operation in [
+ "CONTEXT_ERROR",
+ "BUILD_ERROR",
+ ]:
+ continue
+
+ is_supported = supported_str.lower() == "yes"
+
+ # Use backend_reg_name for grouping, fallback to backend_name
+ backend_key = backend_reg_name if backend_reg_name else backend_name
+
+ self.all_backends.add(backend_key)
+ self.backend_support[backend_key][operation].append(is_supported)
+ self.all_operations.add(operation)
+
+ except Exception as e:
+ self.logger.error(f" Error parsing {file_path}: {e}")
+
+ def get_backend_support_status(self, backend: str, operation: str) -> str:
+ support_list = self.backend_support[backend].get(operation, [])
+
+ if not support_list:
+ return "unsupported"
+
+ all_supported = all(support_list)
+ any_supported = any(support_list)
+
+ if all_supported:
+ return "supported"
+ elif any_supported:
+ return "partially supported"
+ else:
+ return "unsupported"
+
+ def get_support_status(self, operation: str) -> str:
+ if operation not in self.all_operations:
+ return "unsupported"
+
+ support_count = 0
+ total_backends = len(self.all_backends)
+
+ for backend in self.all_backends:
+ if self.backend_support[backend].get(operation, False):
+ support_count += 1
+
+ if support_count == 0:
+ return "unsupported"
+ elif support_count == total_backends:
+ return "supported"
+ else:
+ return "partially supported"
+
+ def get_support_symbol(self, status: str) -> str:
+ symbols = {"supported": "✅", "partially supported": "🟡", "unsupported": "❌"}
+ return symbols.get(status, "❓")
+
+ def generate_markdown(self) -> str:
+ lines = []
+
+ lines.append("# GGML Operations")
+ lines.append("")
+ lines.append("List of GGML operations and backend support status.")
+ lines.append("")
+ lines.append("## How to add a backend to this table:")
+ lines.append("")
+ lines.append("1. Run `test-backend-ops support --output csv` with your backend name and redirect output to a csv file in `docs/ops/` (e.g., `docs/ops/CUDA.csv`)")
+ lines.append("2. Regenerate `/docs/ops.md` via `./scripts/create_ops_docs.py`")
+ lines.append("")
+ lines.append("Legend:")
+ lines.append("- ✅ Fully supported by this backend")
+ lines.append("- 🟡 Partially supported by this backend")
+ lines.append("- ❌ Not supported by this backend")
+ lines.append("")
+
+ backends = sorted(self.all_backends)
+ header = "| Operation |"
+ for backend in backends:
+ header += f" {backend} |"
+
+ separator = "|-----------|"
+ for _ in backends:
+ separator += "------|"
+
+ lines.append(header)
+ lines.append(separator)
+
+ sorted_operations = sorted(self.all_operations)
+
+ for operation in sorted_operations:
+ row = f"| {operation:>32} |"
+
+ for backend in backends:
+ status = self.get_backend_support_status(backend, operation)
+ if status == "supported":
+ symbol = "✅"
+ elif status == "partially supported":
+ symbol = "🟡"
+ else:
+ symbol = "❌"
+ row += f" {symbol} |"
+
+ lines.append(row)
+
+ lines.append("")
+
+ return "\n".join(lines)
+
+ def run(self) -> None:
+ self.logger.info("Parsing GGML operation support files...")
+ self.parse_support_files()
+
+ if not self.all_operations:
+ self.logger.error(
+ "No operations found. Make sure to run test-backend-ops support --output csv > docs/ops/file.csv first."
+ )
+ return
+
+ self.logger.info(
+ f"Found {len(self.all_operations)} operations across {len(self.all_backends)} backends"
+ )
+
+ self.logger.info("Generating markdown...")
+ markdown_content = self.generate_markdown()
+
+ docs_dir = self.ggml_root / "docs"
+ docs_dir.mkdir(exist_ok=True)
+
+ ops_file = docs_dir / self.output_filename
+ with open(ops_file, "w") as f:
+ f.write(markdown_content)
+
+ self.logger.info(f"Generated: {ops_file}")
+ self.logger.info(f"Operations: {len(self.all_operations)}")
+ self.logger.info(f"Backends: {len(self.all_backends)}")
+
+
+def main():
+ logging.basicConfig(level=logging.INFO)
+
+ if len(sys.argv) > 1:
+ output_filename = sys.argv[1]
+ else:
+ output_filename = "ops.md"
+
+ generator = DocsGenerator(".", output_filename)
+ generator.run()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/llama.cpp/scripts/debug-test.sh b/llama.cpp/scripts/debug-test.sh
new file mode 100755
index 0000000..ead7ea1
--- /dev/null
+++ b/llama.cpp/scripts/debug-test.sh
@@ -0,0 +1,202 @@
+#!/usr/bin/env bash
+
+PROG=${0##*/}
+build_dir="build-ci-debug"
+
+# Print Color Commands
+red=$(tput setaf 1)
+green=$(tput setaf 2)
+yellow=$(tput setaf 3)
+blue=$(tput setaf 4)
+magenta=$(tput setaf 5)
+cyan=$(tput setaf 6)
+normal=$(tput sgr0)
+
+
+# Print Help Message
+####################
+
+print_full_help() {
+ cat << EOF
+Usage: $PROG [OPTION]... <test_regex> (test_number)
+Debug specific ctest program.
+
+Options:
+ -h, --help display this help and exit
+ -g run in gdb mode
+
+Arguments:
+ <test_regex> (Mandatory) Supply one regex to the script to filter tests
+ (test_number) (Optional) Test number to run a specific test
+
+Example:
+ $PROG test-tokenizer
+ $PROG test-tokenizer 3
+EOF
+}
+
+abort() {
+ echo "Error: $1" >&2
+ cat << EOF >&2
+Usage: $PROG [OPTION]... <test_regex> (test_number)
+Debug specific ctest program.
+Refer to --help for full instructions.
+EOF
+ exit 1
+}
+
+
+# Dependency Sanity Check
+#########################
+
+check_dependency() {
+ command -v "$1" >/dev/null 2>&1 || {
+ abort "$1 is required but not found. Please install it and try again."
+ }
+}
+
+check_dependency ctest
+check_dependency cmake
+
+
+# Step 0: Check the args
+########################
+
+if [ x"$1" = x"-h" ] || [ x"$1" = x"--help" ]; then
+ print_full_help >&2
+ exit 0
+fi
+
+# Parse command-line options
+gdb_mode=false
+while getopts "g" opt; do
+ case $opt in
+ g)
+ gdb_mode=true
+ echo "gdb_mode Mode Enabled"
+ ;;
+ esac
+done
+
+# Shift the option parameters
+shift $((OPTIND - 1))
+
+# Positionial Argument Processing : <test_regex>
+if [ -z "${1}" ]; then
+ abort "Test regex is required"
+else
+ test_suite=${1:-}
+fi
+
+# Positionial Argument Processing : (test_number)
+test_number=${2:-}
+
+
+# Step 1: Reset and Setup folder context
+########################################
+
+## Sanity check that we are actually in a git repo
+repo_root=$(git rev-parse --show-toplevel)
+if [ ! -d "$repo_root" ]; then
+ abort "Not in a Git repository."
+fi
+
+## Reset folder to root context of git repo and Create and enter build directory
+pushd "$repo_root"
+rm -rf "$build_dir" && mkdir "$build_dir" || abort "Failed to make $build_dir"
+
+
+# Step 2: Setup Build Environment and Compile Test Binaries
+###########################################################
+
+cmake -B "./$build_dir" -DCMAKE_BUILD_TYPE=Debug -DGGML_CUDA=1 || abort "Failed to build environment"
+pushd "$build_dir"
+make -j || abort "Failed to compile"
+popd > /dev/null || exit 1
+
+
+# Step 3: Find all tests available that matches REGEX
+####################################################
+
+# Ctest Gather Tests
+# `-R test-tokenizer` : looks for all the test files named `test-tokenizer*` (R=Regex)
+# `-N` : "show-only" disables test execution & shows test commands that you can feed to GDB.
+# `-V` : Verbose Mode
+printf "\n\nGathering tests that fit REGEX: ${test_suite} ...\n"
+pushd "$build_dir"
+tests=($(ctest -R ${test_suite} -V -N | grep -E " +Test +#[0-9]+*" | cut -d':' -f2 | awk '{$1=$1};1'))
+if [ ${#tests[@]} -eq 0 ]; then
+ abort "No tests available... check your compilation process..."
+fi
+popd > /dev/null || exit 1
+
+
+# Step 4: Identify Test Command for Debugging
+#############################################
+
+# Select test number
+if [ -z $test_number ]; then
+ # List out available tests
+ printf "Which test would you like to debug?\n"
+ id=0
+ for s in "${tests[@]}"
+ do
+ echo "Test# ${id}"
+ echo " $s"
+ ((id++))
+ done
+
+ # Prompt user which test they wanted to run
+ printf "\nRun test#? "
+ read test_number
+
+else
+ printf "\nUser Already Requested #${test_number}\n"
+
+fi
+
+# Grab all tests commands
+pushd "$build_dir"
+sIFS=$IFS # Save Initial IFS (Internal Field Separator)
+IFS=$'\n' # Change IFS (Internal Field Separator) (So we split ctest output by newline rather than by spaces)
+test_args=($(ctest -R ${test_suite} -V -N | grep "Test command" | cut -d':' -f3 | awk '{$1=$1};1' )) # Get test args
+IFS=$sIFS # Reset IFS (Internal Field Separator)
+popd > /dev/null || exit 1
+
+# Grab specific test command
+single_test_name="${tests[test_number]}"
+single_test_command="${test_args[test_number]}"
+
+
+# Step 5: Execute or GDB Debug
+##############################
+
+printf "${magenta}Running Test #${test_number}: ${single_test_name}${normal}\n"
+printf "${cyan}single_test_command: ${single_test_command}${normal}\n"
+
+if [ "$gdb_mode" = "true" ]; then
+ # Execute debugger
+ pushd "$repo_root" || exit 1
+ eval "gdb --args ${single_test_command}"
+ popd > /dev/null || exit 1
+
+else
+ # Execute Test
+ pushd "$repo_root" || exit 1
+ eval "${single_test_command}"
+ exit_code=$?
+ popd > /dev/null || exit 1
+
+ # Print Result
+ printf "${blue}Ran Test #${test_number}: ${single_test_name}${normal}\n"
+ printf "${yellow}Command: ${single_test_command}${normal}\n"
+ if [ $exit_code -eq 0 ]; then
+ printf "${green}TEST PASS${normal}\n"
+ else
+ printf "${red}TEST FAIL${normal}\n"
+ fi
+
+fi
+
+# Return to the directory from which the user ran the command.
+popd > /dev/null || exit 1
diff --git a/llama.cpp/scripts/fetch_server_test_models.py b/llama.cpp/scripts/fetch_server_test_models.py
new file mode 100755
index 0000000..ac483ef
--- /dev/null
+++ b/llama.cpp/scripts/fetch_server_test_models.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python
+'''
+ This script fetches all the models used in the server tests.
+
+ This is useful for slow tests that use larger models, to avoid them timing out on the model downloads.
+
+ It is meant to be run from the root of the repository.
+
+ Example:
+ python scripts/fetch_server_test_models.py
+ ( cd tools/server/tests && ./tests.sh -v -x -m slow )
+'''
+import ast
+import glob
+import logging
+import os
+from typing import Generator
+from pydantic import BaseModel
+from typing import Optional
+import subprocess
+
+
+class HuggingFaceModel(BaseModel):
+ hf_repo: str
+ hf_file: Optional[str] = None
+
+ class Config:
+ frozen = True
+
+
+def collect_hf_model_test_parameters(test_file) -> Generator[HuggingFaceModel, None, None]:
+ try:
+ with open(test_file) as f:
+ tree = ast.parse(f.read())
+ except Exception as e:
+ logging.error(f'collect_hf_model_test_parameters failed on {test_file}: {e}')
+ return
+
+ for node in ast.walk(tree):
+ if isinstance(node, ast.FunctionDef):
+ for dec in node.decorator_list:
+ if isinstance(dec, ast.Call) and isinstance(dec.func, ast.Attribute) and dec.func.attr == 'parametrize':
+ param_names = ast.literal_eval(dec.args[0]).split(",")
+ if "hf_repo" not in param_names:
+ continue
+
+ raw_param_values = dec.args[1]
+ if not isinstance(raw_param_values, ast.List):
+ logging.warning(f'Skipping non-list parametrize entry at {test_file}:{node.lineno}')
+ continue
+
+ hf_repo_idx = param_names.index("hf_repo")
+ hf_file_idx = param_names.index("hf_file") if "hf_file" in param_names else None
+
+ for t in raw_param_values.elts:
+ if not isinstance(t, ast.Tuple):
+ logging.warning(f'Skipping non-tuple parametrize entry at {test_file}:{node.lineno}')
+ continue
+ yield HuggingFaceModel(
+ hf_repo=ast.literal_eval(t.elts[hf_repo_idx]),
+ hf_file=ast.literal_eval(t.elts[hf_file_idx]) if hf_file_idx is not None else None)
+
+
+if __name__ == '__main__':
+ logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
+
+ models = sorted(list(set([
+ model
+ for test_file in glob.glob('tools/server/tests/unit/test_*.py')
+ for model in collect_hf_model_test_parameters(test_file)
+ ])), key=lambda m: (m.hf_repo, m.hf_file))
+
+ logging.info(f'Found {len(models)} models in parameterized tests:')
+ for m in models:
+ logging.info(f' - {m.hf_repo} / {m.hf_file}')
+
+ cli_path = os.environ.get(
+ 'LLAMA_CLI_BIN_PATH',
+ os.path.join(
+ os.path.dirname(__file__),
+ '../build/bin/Release/llama-cli.exe' if os.name == 'nt' else '../build/bin/llama-cli'))
+
+ for m in models:
+ if '<' in m.hf_repo or (m.hf_file is not None and '<' in m.hf_file):
+ continue
+ if m.hf_file is not None and '-of-' in m.hf_file:
+ logging.warning(f'Skipping model at {m.hf_repo} / {m.hf_file} because it is a split file')
+ continue
+ logging.info(f'Using llama-cli to ensure model {m.hf_repo}/{m.hf_file} was fetched')
+ cmd = [
+ cli_path,
+ '-hfr', m.hf_repo,
+ *([] if m.hf_file is None else ['-hff', m.hf_file]),
+ '-n', '1',
+ '-p', 'Hey',
+ '--no-warmup',
+ '--log-disable',
+ '-no-cnv']
+ if m.hf_file != 'tinyllamas/stories260K.gguf' and 'Mistral-Nemo' not in m.hf_repo:
+ cmd.append('-fa')
+ try:
+ subprocess.check_call(cmd)
+ except subprocess.CalledProcessError:
+ logging.error(f'Failed to fetch model at {m.hf_repo} / {m.hf_file} with command:\n {" ".join(cmd)}')
+ exit(1)
diff --git a/llama.cpp/scripts/gen-authors.sh b/llama.cpp/scripts/gen-authors.sh
new file mode 100755
index 0000000..73e7b38
--- /dev/null
+++ b/llama.cpp/scripts/gen-authors.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+
+printf "# date: $(date)\n" > AUTHORS
+printf "# this file is auto-generated by scripts/gen-authors.sh\n\n" >> AUTHORS
+
+git log --format='%an <%ae>' --reverse --date=short master | awk '!seen[$0]++' | sort >> AUTHORS
+
+# if necessary, update your name here. for example: jdoe -> John Doe
+sed -i '' 's/^jdoe/John Doe/g' AUTHORS
diff --git a/llama.cpp/scripts/gen-unicode-data.py b/llama.cpp/scripts/gen-unicode-data.py
new file mode 100644
index 0000000..2d9bde0
--- /dev/null
+++ b/llama.cpp/scripts/gen-unicode-data.py
@@ -0,0 +1,196 @@
+from __future__ import annotations
+
+import array
+import unicodedata
+import requests
+
+
+MAX_CODEPOINTS = 0x110000
+
+UNICODE_DATA_URL = "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt"
+
+
+# see https://www.unicode.org/L2/L1999/UnicodeData.html
+def unicode_data_iter():
+ res = requests.get(UNICODE_DATA_URL)
+ res.raise_for_status()
+ data = res.content.decode()
+
+ prev = []
+
+ for line in data.splitlines():
+ # ej: 0000;<control>;Cc;0;BN;;;;;N;NULL;;;;
+ line = line.split(";")
+
+ cpt = int(line[0], base=16)
+ assert cpt < MAX_CODEPOINTS
+
+ cpt_lower = int(line[-2] or "0", base=16)
+ assert cpt_lower < MAX_CODEPOINTS
+
+ cpt_upper = int(line[-3] or "0", base=16)
+ assert cpt_upper < MAX_CODEPOINTS
+
+ categ = line[2].strip()
+ assert len(categ) == 2
+
+ bidir = line[4].strip()
+ assert len(categ) == 2
+
+ name = line[1]
+ if name.endswith(", First>"):
+ prev = (cpt, cpt_lower, cpt_upper, categ, bidir)
+ continue
+ if name.endswith(", Last>"):
+ assert prev[1:] == (0, 0, categ, bidir)
+ for c in range(prev[0], cpt):
+ yield (c, cpt_lower, cpt_upper, categ, bidir)
+
+ yield (cpt, cpt_lower, cpt_upper, categ, bidir)
+
+
+# see definition in unicode.h
+CODEPOINT_FLAG_UNDEFINED = 0x0001 #
+CODEPOINT_FLAG_NUMBER = 0x0002 # \p{N}
+CODEPOINT_FLAG_LETTER = 0x0004 # \p{L}
+CODEPOINT_FLAG_SEPARATOR = 0x0008 # \p{Z}
+CODEPOINT_FLAG_MARK = 0x0010 # \p{M}
+CODEPOINT_FLAG_PUNCTUATION = 0x0020 # \p{P}
+CODEPOINT_FLAG_SYMBOL = 0x0040 # \p{S}
+CODEPOINT_FLAG_CONTROL = 0x0080 # \p{C}
+
+UNICODE_CATEGORY_TO_FLAG = {
+ "Cn": CODEPOINT_FLAG_UNDEFINED, # Undefined
+ "Cc": CODEPOINT_FLAG_CONTROL, # Control
+ "Cf": CODEPOINT_FLAG_CONTROL, # Format
+ "Co": CODEPOINT_FLAG_CONTROL, # Private Use
+ "Cs": CODEPOINT_FLAG_CONTROL, # Surrrogate
+ "Ll": CODEPOINT_FLAG_LETTER, # Lowercase Letter
+ "Lm": CODEPOINT_FLAG_LETTER, # Modifier Letter
+ "Lo": CODEPOINT_FLAG_LETTER, # Other Letter
+ "Lt": CODEPOINT_FLAG_LETTER, # Titlecase Letter
+ "Lu": CODEPOINT_FLAG_LETTER, # Uppercase Letter
+ "L&": CODEPOINT_FLAG_LETTER, # Cased Letter
+ "Mc": CODEPOINT_FLAG_MARK, # Spacing Mark
+ "Me": CODEPOINT_FLAG_MARK, # Enclosing Mark
+ "Mn": CODEPOINT_FLAG_MARK, # Nonspacing Mark
+ "Nd": CODEPOINT_FLAG_NUMBER, # Decimal Number
+ "Nl": CODEPOINT_FLAG_NUMBER, # Letter Number
+ "No": CODEPOINT_FLAG_NUMBER, # Other Number
+ "Pc": CODEPOINT_FLAG_PUNCTUATION, # Connector Punctuation
+ "Pd": CODEPOINT_FLAG_PUNCTUATION, # Dash Punctuation
+ "Pe": CODEPOINT_FLAG_PUNCTUATION, # Close Punctuation
+ "Pf": CODEPOINT_FLAG_PUNCTUATION, # Final Punctuation
+ "Pi": CODEPOINT_FLAG_PUNCTUATION, # Initial Punctuation
+ "Po": CODEPOINT_FLAG_PUNCTUATION, # Other Punctuation
+ "Ps": CODEPOINT_FLAG_PUNCTUATION, # Open Punctuation
+ "Sc": CODEPOINT_FLAG_SYMBOL, # Currency Symbol
+ "Sk": CODEPOINT_FLAG_SYMBOL, # Modifier Symbol
+ "Sm": CODEPOINT_FLAG_SYMBOL, # Math Symbol
+ "So": CODEPOINT_FLAG_SYMBOL, # Other Symbol
+ "Zl": CODEPOINT_FLAG_SEPARATOR, # Line Separator
+ "Zp": CODEPOINT_FLAG_SEPARATOR, # Paragraph Separator
+ "Zs": CODEPOINT_FLAG_SEPARATOR, # Space Separator
+}
+
+
+codepoint_flags = array.array('H', [CODEPOINT_FLAG_UNDEFINED]) * MAX_CODEPOINTS
+table_whitespace = []
+table_lowercase = []
+table_uppercase = []
+table_nfd = []
+
+for (cpt, cpt_lower, cpt_upper, categ, bidir) in unicode_data_iter():
+ # convert codepoint to unicode character
+ char = chr(cpt)
+
+ # codepoint category flags
+ codepoint_flags[cpt] = UNICODE_CATEGORY_TO_FLAG[categ]
+
+ # lowercase conversion
+ if cpt_lower:
+ table_lowercase.append((cpt, cpt_lower))
+
+ # uppercase conversion
+ if cpt_upper:
+ table_uppercase.append((cpt, cpt_upper))
+
+ # NFD normalization
+ norm = ord(unicodedata.normalize('NFD', char)[0])
+ if cpt != norm:
+ table_nfd.append((cpt, norm))
+
+
+# whitespaces, see "<White_Space>" https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
+table_whitespace.extend(range(0x0009, 0x000D + 1))
+table_whitespace.extend(range(0x2000, 0x200A + 1))
+table_whitespace.extend([0x0020, 0x0085, 0x00A0, 0x1680, 0x2028, 0x2029, 0x202F, 0x205F, 0x3000])
+
+
+# sort by codepoint
+table_whitespace.sort()
+table_lowercase.sort()
+table_uppercase.sort()
+table_nfd.sort()
+
+
+# group ranges with same flags
+ranges_flags: list[tuple[int, int]] = [(0, codepoint_flags[0])] # start, flags
+for codepoint, flags in enumerate(codepoint_flags):
+ if flags != ranges_flags[-1][1]:
+ ranges_flags.append((codepoint, flags))
+ranges_flags.append((MAX_CODEPOINTS, 0x0000))
+
+
+# group ranges with same nfd
+ranges_nfd: list[tuple[int, int, int]] = [(0, 0, 0)] # start, last, nfd
+for codepoint, norm in table_nfd:
+ start = ranges_nfd[-1][0]
+ if ranges_nfd[-1] != (start, codepoint - 1, norm):
+ ranges_nfd.append(None) # type: ignore[arg-type] # dummy, will be replaced below
+ start = codepoint
+ ranges_nfd[-1] = (start, codepoint, norm)
+
+
+# Generate 'unicode-data.cpp':
+# python ./scripts//gen-unicode-data.py > unicode-data.cpp
+
+def out(line=""):
+ print(line, end='\n') # noqa
+
+
+out("""\
+// generated with scripts/gen-unicode-data.py
+
+#include "unicode-data.h"
+
+#include <cstdint>
+#include <vector>
+#include <unordered_map>
+#include <unordered_set>
+""")
+
+out("const std::vector<std::pair<uint32_t, uint16_t>> unicode_ranges_flags = { // start, flags // last=next_start-1")
+for codepoint, flags in ranges_flags:
+ out("{0x%06X, 0x%04X}," % (codepoint, flags))
+out("};\n")
+
+out("const std::unordered_set<uint32_t> unicode_set_whitespace = {")
+for codepoint in table_whitespace:
+ out("0x%06X," % codepoint)
+out("};\n")
+
+out("const std::unordered_map<uint32_t, uint32_t> unicode_map_lowercase = {")
+for tuple_lw in table_lowercase:
+ out("{0x%06X, 0x%06X}," % tuple_lw)
+out("};\n")
+
+out("const std::unordered_map<uint32_t, uint32_t> unicode_map_uppercase = {")
+for tuple_up in table_uppercase:
+ out("{0x%06X, 0x%06X}," % tuple_up)
+out("};\n")
+
+out("const std::vector<range_nfd> unicode_ranges_nfd = { // start, last, nfd")
+for triple in ranges_nfd:
+ out("{0x%06X, 0x%06X, 0x%06X}," % triple)
+out("};\n")
diff --git a/llama.cpp/scripts/get-flags.mk b/llama.cpp/scripts/get-flags.mk
new file mode 100644
index 0000000..a742766
--- /dev/null
+++ b/llama.cpp/scripts/get-flags.mk
@@ -0,0 +1,38 @@
+ifeq '' '$(findstring clang,$(shell $(GF_CC) --version))'
+ GF_CC_IS_GCC = 1
+ GF_CC_VER := $(shell { $(GF_CC) -dumpfullversion 2>/dev/null; echo; $(GF_CC) -dumpversion; } | awk -F. '/./ { printf("%02d%02d%02d", $$1, $$2, $$3); exit }')
+else
+ GF_CC_IS_CLANG = 1
+ ifeq '' '$(findstring Apple,$(shell $(GF_CC) --version))'
+ GF_CC_IS_LLVM_CLANG = 1
+ else
+ GF_CC_IS_APPLE_CLANG = 1
+ endif
+ GF_CC_VER := \
+ $(shell $(GF_CC) --version | sed -n 's/^.* version \([0-9.]*\).*$$/\1/p' \
+ | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
+endif
+
+ifeq ($(GF_CC_IS_CLANG), 1)
+ # clang options
+ GF_CFLAGS = -Wunreachable-code-break -Wunreachable-code-return
+ GF_CXXFLAGS = -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi
+
+ ifneq '' '$(and $(GF_CC_IS_LLVM_CLANG),$(filter 1,$(shell expr $(GF_CC_VER) \>= 030800)))'
+ GF_CFLAGS += -Wdouble-promotion
+ endif
+ ifneq '' '$(and $(GF_CC_IS_APPLE_CLANG),$(filter 1,$(shell expr $(GF_CC_VER) \>= 070300)))'
+ GF_CFLAGS += -Wdouble-promotion
+ endif
+else
+ # gcc options
+ GF_CFLAGS = -Wdouble-promotion
+ GF_CXXFLAGS = -Wno-array-bounds
+
+ ifeq ($(shell expr $(GF_CC_VER) \>= 070100), 1)
+ GF_CXXFLAGS += -Wno-format-truncation
+ endif
+ ifeq ($(shell expr $(GF_CC_VER) \>= 080100), 1)
+ GF_CXXFLAGS += -Wextra-semi
+ endif
+endif
diff --git a/llama.cpp/scripts/get-hellaswag.sh b/llama.cpp/scripts/get-hellaswag.sh
new file mode 100755
index 0000000..484e56f
--- /dev/null
+++ b/llama.cpp/scripts/get-hellaswag.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+
+wget https://raw.githubusercontent.com/klosax/hellaswag_text_data/main/hellaswag_val_full.txt
+
+echo "Usage:"
+echo ""
+echo " ./llama-perplexity -m model.gguf -f hellaswag_val_full.txt --hellaswag [--hellaswag-tasks N] [other params]"
+echo ""
+
+exit 0
diff --git a/llama.cpp/scripts/get-pg.sh b/llama.cpp/scripts/get-pg.sh
new file mode 100755
index 0000000..f180bf8
--- /dev/null
+++ b/llama.cpp/scripts/get-pg.sh
@@ -0,0 +1,70 @@
+#!/usr/bin/env bash
+
+function usage {
+ echo "usage: <n>$0"
+ echo "note: n is the number of essays to download"
+ echo "for specific n, the resulting pg.txt file will have the following number of tokens:"
+ echo "n | tokens"
+ echo "--- | ---"
+ echo "1 | 6230"
+ echo "2 | 23619"
+ echo "5 | 25859"
+ echo "10 | 36888"
+ echo "15 | 50188"
+ echo "20 | 59094"
+ echo "25 | 88764"
+ echo "30 | 103121"
+ echo "32 | 108338"
+ echo "35 | 113403"
+ echo "40 | 127699"
+ echo "45 | 135896"
+ exit 1
+}
+
+function has_cmd {
+ if ! [ -x "$(command -v $1)" ]; then
+ echo "error: $1 is not available" >&2
+ exit 1
+ fi
+}
+
+# check for: curl, html2text, tail, sed, fmt
+has_cmd curl
+has_cmd html2text
+has_cmd tail
+has_cmd sed
+
+if [ $# -ne 1 ]; then
+ usage
+fi
+
+n=$1
+
+# get urls
+urls="$(curl http://www.aaronsw.com/2002/feeds/pgessays.rss | grep html | sed -e "s/.*http/http/" | sed -e "s/html.*/html/" | head -n $n)"
+
+printf "urls:\n%s\n" "$urls"
+
+if [ -f pg.txt ]; then
+ rm pg.txt
+fi
+
+c=1
+for url in $urls; do
+ echo "processing $url"
+
+ cc=$(printf "%03d" $c)
+
+ curl -L $url | html2text | tail -n +4 | sed -E "s/^[[:space:]]+//g" | fmt -w 80 >> pg-$cc-one.txt
+ cat pg-$cc-one.txt >> pg.txt
+
+ cp -v pg.txt pg-$cc-all.txt
+ c=$((c+1))
+
+ # don't flood the server
+ sleep 1
+done
+
+echo "done. data in pg.txt"
+
+exit 0
diff --git a/llama.cpp/scripts/get-wikitext-103.sh b/llama.cpp/scripts/get-wikitext-103.sh
new file mode 100755
index 0000000..244a371
--- /dev/null
+++ b/llama.cpp/scripts/get-wikitext-103.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+
+wget https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-raw-v1.zip
+
+echo "Usage:"
+echo ""
+echo " ./llama-perplexity -m model.gguf -f wiki.test.raw [other params]"
+echo ""
+
+exit 0
diff --git a/llama.cpp/scripts/get-wikitext-2.sh b/llama.cpp/scripts/get-wikitext-2.sh
new file mode 100755
index 0000000..67b0b01
--- /dev/null
+++ b/llama.cpp/scripts/get-wikitext-2.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+
+wget https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip
+unzip wikitext-2-raw-v1.zip
+
+echo "Usage:"
+echo ""
+echo " ./llama-perplexity -m model.gguf -f wikitext-2-raw/wiki.test.raw [other params]"
+echo ""
+
+exit 0
diff --git a/llama.cpp/scripts/get-winogrande.sh b/llama.cpp/scripts/get-winogrande.sh
new file mode 100755
index 0000000..2b48b11
--- /dev/null
+++ b/llama.cpp/scripts/get-winogrande.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+
+wget https://huggingface.co/datasets/ikawrakow/winogrande-eval-for-llama.cpp/raw/main/winogrande-debiased-eval.csv
+
+echo "Usage:"
+echo ""
+echo " ./llama-perplexity -m model.gguf -f winogrande-debiased-eval.csv --winogrande [--winogrande-tasks N] [other params]"
+echo ""
+
+exit 0
diff --git a/llama.cpp/scripts/get_chat_template.py b/llama.cpp/scripts/get_chat_template.py
new file mode 100755
index 0000000..b4827b3
--- /dev/null
+++ b/llama.cpp/scripts/get_chat_template.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+'''
+ Fetches the Jinja chat template of a HuggingFace model.
+ If a model has multiple chat templates, you can specify the variant name.
+
+ Syntax:
+ ./scripts/get_chat_template.py model_id [variant]
+
+ Examples:
+ ./scripts/get_chat_template.py CohereForAI/c4ai-command-r-plus tool_use
+ ./scripts/get_chat_template.py microsoft/Phi-3.5-mini-instruct
+'''
+
+import json
+import re
+import sys
+
+
+def get_chat_template(model_id, variant=None):
+ try:
+ # Use huggingface_hub library if available.
+ # Allows access to gated models if the user has access and ran `huggingface-cli login`.
+ from huggingface_hub import hf_hub_download
+ with open(hf_hub_download(repo_id=model_id, filename="tokenizer_config.json"), encoding="utf-8") as f:
+ config_str = f.read()
+ except ImportError:
+ import requests
+ assert re.match(r"^[\w.-]+/[\w.-]+$", model_id), f"Invalid model ID: {model_id}"
+ response = requests.get(f"https://huggingface.co/{model_id}/resolve/main/tokenizer_config.json")
+ if response.status_code == 401:
+ raise Exception('Access to this model is gated, please request access, authenticate with `huggingface-cli login` and make sure to run `pip install huggingface_hub`')
+ response.raise_for_status()
+ config_str = response.text
+
+ try:
+ config = json.loads(config_str)
+ except json.JSONDecodeError:
+ # Fix https://huggingface.co/NousResearch/Meta-Llama-3-8B-Instruct/blob/main/tokenizer_config.json
+ # (Remove extra '}' near the end of the file)
+ config = json.loads(re.sub(r'\}([\n\s]*\}[\n\s]*\],[\n\s]*"clean_up_tokenization_spaces")', r'\1', config_str))
+
+ chat_template = config['chat_template']
+ if isinstance(chat_template, str):
+ return chat_template
+ else:
+ variants = {
+ ct['name']: ct['template']
+ for ct in chat_template
+ }
+
+ def format_variants():
+ return ', '.join(f'"{v}"' for v in variants.keys())
+
+ if variant is None:
+ if 'default' not in variants:
+ raise Exception(f'Please specify a chat template variant (one of {format_variants()})')
+ variant = 'default'
+ sys.stderr.write(f'Note: picked "default" chat template variant (out of {format_variants()})\n')
+ elif variant not in variants:
+ raise Exception(f"Variant {variant} not found in chat template (found {format_variants()})")
+
+ return variants[variant]
+
+
+def main(args):
+ if len(args) < 1:
+ raise ValueError("Please provide a model ID and an optional variant name")
+ model_id = args[0]
+ variant = None if len(args) < 2 else args[1]
+
+ template = get_chat_template(model_id, variant)
+ sys.stdout.write(template)
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff --git a/llama.cpp/scripts/hf.sh b/llama.cpp/scripts/hf.sh
new file mode 100755
index 0000000..e41b905
--- /dev/null
+++ b/llama.cpp/scripts/hf.sh
@@ -0,0 +1,112 @@
+#!/usr/bin/env bash
+#
+# Shortcut for downloading HF models
+#
+# Usage:
+# ./llama-cli -m $(./scripts/hf.sh https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q4_K_M.gguf)
+# ./llama-cli -m $(./scripts/hf.sh --url https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/blob/main/mixtral-8x7b-v0.1.Q4_K_M.gguf)
+# ./llama-cli -m $(./scripts/hf.sh --repo TheBloke/Mixtral-8x7B-v0.1-GGUF --file mixtral-8x7b-v0.1.Q4_K_M.gguf)
+#
+
+# all logs go to stderr
+function log {
+ echo "$@" 1>&2
+}
+
+function usage {
+ log "Usage: $0 [[--url] <url>] [--repo <repo>] [--file <file>] [--outdir <dir> [-h|--help]"
+ exit 1
+}
+
+# check for curl or wget
+function has_cmd {
+ if ! [ -x "$(command -v $1)" ]; then
+ return 1
+ fi
+}
+
+if has_cmd wget; then
+ cmd="wget -q -c -O %s/%s %s"
+elif has_cmd curl; then
+ cmd="curl -C - -f --output-dir %s -o %s -L %s"
+else
+ log "[E] curl or wget not found"
+ exit 1
+fi
+
+url=""
+repo=""
+file=""
+outdir="."
+
+# parse args
+while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --url)
+ url="$2"
+ shift 2
+ ;;
+ --repo)
+ repo="$2"
+ shift 2
+ ;;
+ --file)
+ file="$2"
+ shift 2
+ ;;
+ --outdir)
+ outdir="$2"
+ shift 2
+ ;;
+ -h|--help)
+ usage
+ ;;
+ *)
+ url="$1"
+ shift
+ ;;
+ esac
+done
+
+if [ -n "$repo" ] && [ -n "$file" ]; then
+ url="https://huggingface.co/$repo/resolve/main/$file"
+fi
+
+if [ -z "$url" ]; then
+ log "[E] missing --url"
+ usage
+fi
+
+# check if the URL is a HuggingFace model, and if so, try to download it
+is_url=false
+
+if [[ ${#url} -gt 22 ]]; then
+ if [[ ${url:0:22} == "https://huggingface.co" ]]; then
+ is_url=true
+ fi
+fi
+
+if [ "$is_url" = false ]; then
+ log "[E] invalid URL, must start with https://huggingface.co"
+ exit 0
+fi
+
+# replace "blob/main" with "resolve/main"
+url=${url/blob\/main/resolve\/main}
+
+basename=$(basename $url)
+
+log "[+] attempting to download $basename"
+
+if [ -n "$cmd" ]; then
+ cmd=$(printf "$cmd" "$outdir" "$basename" "$url")
+ log "[+] $cmd"
+ if $cmd; then
+ echo $outdir/$basename
+ exit 0
+ fi
+fi
+
+log "[-] failed to download"
+
+exit 1
diff --git a/llama.cpp/scripts/install-oneapi.bat b/llama.cpp/scripts/install-oneapi.bat
new file mode 100644
index 0000000..e99bef1
--- /dev/null
+++ b/llama.cpp/scripts/install-oneapi.bat
@@ -0,0 +1,19 @@
+:: MIT license
+:: Copyright (C) 2024 Intel Corporation
+:: SPDX-License-Identifier: MIT
+
+
+set URL=%1
+set COMPONENTS=%2
+
+curl.exe --output %TEMP%\webimage.exe --url %URL% --retry 5 --retry-delay 5
+start /b /wait %TEMP%\webimage.exe -s -x -f webimage_extracted --log extract.log
+del %TEMP%\webimage.exe
+if "%COMPONENTS%"=="" (
+ webimage_extracted\bootstrapper.exe -s --action install --eula=accept -p=NEED_VS2017_INTEGRATION=0 -p=NEED_VS2019_INTEGRATION=0 -p=NEED_VS2022_INTEGRATION=0 --log-dir=.
+) else (
+ webimage_extracted\bootstrapper.exe -s --action install --components=%COMPONENTS% --eula=accept -p=NEED_VS2017_INTEGRATION=0 -p=NEED_VS2019_INTEGRATION=0 -p=NEED_VS2022_INTEGRATION=0 --log-dir=.
+)
+set installer_exit_code=%ERRORLEVEL%
+rd /s/q "webimage_extracted"
+exit /b %installer_exit_code%
diff --git a/llama.cpp/scripts/jinja/jinja-tester.py b/llama.cpp/scripts/jinja/jinja-tester.py
new file mode 100755
index 0000000..a489305
--- /dev/null
+++ b/llama.cpp/scripts/jinja/jinja-tester.py
@@ -0,0 +1,504 @@
+#!/usr/bin/env python3
+import sys
+import json
+import argparse
+import jinja2.ext as jinja2_ext
+from PySide6.QtWidgets import (
+ QApplication,
+ QMainWindow,
+ QWidget,
+ QVBoxLayout,
+ QHBoxLayout,
+ QLabel,
+ QPlainTextEdit,
+ QTextEdit,
+ QPushButton,
+ QFileDialog,
+)
+from PySide6.QtGui import QColor, QColorConstants, QTextCursor, QTextFormat
+from PySide6.QtCore import Qt, QRect, QSize
+from jinja2 import TemplateSyntaxError
+from jinja2.sandbox import ImmutableSandboxedEnvironment
+from datetime import datetime
+
+
+def format_template_content(template_content):
+ """Format the Jinja template content using Jinja2's lexer."""
+ if not template_content.strip():
+ return template_content
+
+ env = ImmutableSandboxedEnvironment()
+ tc_rstrip = template_content.rstrip()
+ tokens = list(env.lex(tc_rstrip))
+ result = ""
+ indent_level = 0
+ i = 0
+
+ while i < len(tokens):
+ token = tokens[i]
+ _, token_type, token_value = token
+
+ if token_type == "block_begin":
+ block_start = i
+ # Collect all tokens for this block construct
+ construct_content = token_value
+ end_token_type = token_type.replace("_begin", "_end")
+ j = i + 1
+ while j < len(tokens) and tokens[j][1] != end_token_type:
+ construct_content += tokens[j][2]
+ j += 1
+
+ if j < len(tokens): # Found the end token
+ construct_content += tokens[j][2]
+ i = j # Skip to the end token
+
+ # Check for control structure keywords for indentation
+ stripped_content = construct_content.strip()
+ instr = block_start + 1
+ while tokens[instr][1] == "whitespace":
+ instr = instr + 1
+
+ instruction_token = tokens[instr][2]
+ start_control_tokens = ["if", "for", "macro", "call", "block"]
+ end_control_tokens = ["end" + t for t in start_control_tokens]
+ is_control_start = any(
+ instruction_token.startswith(kw) for kw in start_control_tokens
+ )
+ is_control_end = any(
+ instruction_token.startswith(kw) for kw in end_control_tokens
+ )
+
+ # Adjust indentation for control structures
+ # For control end blocks, decrease indent BEFORE adding the content
+ if is_control_end:
+ indent_level = max(0, indent_level - 1)
+
+ # Remove all previous whitespace before this block
+ result = result.rstrip()
+
+ # Add proper indent, but only if this is not the first token
+ added_newline = False
+ if result: # Only add newline and indent if there's already content
+ result += (
+ "\n" + " " * indent_level
+ ) # Use 2 spaces per indent level
+ added_newline = True
+ else: # For the first token, don't add any indent
+ result += ""
+
+ # Add the block content
+ result += stripped_content
+
+ # Add '-' after '%' if it wasn't there and we added a newline or indent
+ if (
+ added_newline
+ and stripped_content.startswith("{%")
+ and not stripped_content.startswith("{%-")
+ ):
+ # Add '-' at the beginning
+ result = (
+ result[: result.rfind("{%")]
+ + "{%-"
+ + result[result.rfind("{%") + 2 :]
+ )
+ if stripped_content.endswith("%}") and not stripped_content.endswith(
+ "-%}"
+ ):
+ # Only add '-' if this is not the last token or if there's content after
+ if i + 1 < len(tokens) and tokens[i + 1][1] != "eof":
+ result = result[:-2] + "-%}"
+
+ # For control start blocks, increase indent AFTER adding the content
+ if is_control_start:
+ indent_level += 1
+ else:
+ # Malformed template, just add the token
+ result += token_value
+ elif token_type == "variable_begin":
+ # Collect all tokens for this variable construct
+ construct_content = token_value
+ end_token_type = token_type.replace("_begin", "_end")
+ j = i + 1
+ while j < len(tokens) and tokens[j][1] != end_token_type:
+ construct_content += tokens[j][2]
+ j += 1
+
+ if j < len(tokens): # Found the end token
+ construct_content += tokens[j][2]
+ i = j # Skip to the end token
+
+ # For variable constructs, leave them alone
+ # Do not add indent or whitespace before or after them
+ result += construct_content
+ else:
+ # Malformed template, just add the token
+ result += token_value
+ elif token_type == "data":
+ # Handle data (text between Jinja constructs)
+ # For data content, preserve it as is
+ result += token_value
+ else:
+ # Handle any other tokens
+ result += token_value
+
+ i += 1
+
+ # Clean up trailing newlines and spaces
+ result = result.rstrip()
+
+ # Copy the newline / space count from the original
+ if (trailing_length := len(template_content) - len(tc_rstrip)):
+ result += template_content[-trailing_length:]
+
+ return result
+
+
+# ------------------------
+# Line Number Widget
+# ------------------------
+class LineNumberArea(QWidget):
+ def __init__(self, editor):
+ super().__init__(editor)
+ self.code_editor = editor
+
+ def sizeHint(self):
+ return QSize(self.code_editor.line_number_area_width(), 0)
+
+ def paintEvent(self, event):
+ self.code_editor.line_number_area_paint_event(event)
+
+
+class CodeEditor(QPlainTextEdit):
+ def __init__(self):
+ super().__init__()
+ self.line_number_area = LineNumberArea(self)
+
+ self.blockCountChanged.connect(self.update_line_number_area_width)
+ self.updateRequest.connect(self.update_line_number_area)
+ self.cursorPositionChanged.connect(self.highlight_current_line)
+
+ self.update_line_number_area_width(0)
+ self.highlight_current_line()
+
+ def line_number_area_width(self):
+ digits = len(str(self.blockCount()))
+ space = 3 + self.fontMetrics().horizontalAdvance("9") * digits
+ return space
+
+ def update_line_number_area_width(self, _):
+ self.setViewportMargins(self.line_number_area_width(), 0, 0, 0)
+
+ def update_line_number_area(self, rect, dy):
+ if dy:
+ self.line_number_area.scroll(0, dy)
+ else:
+ self.line_number_area.update(
+ 0, rect.y(), self.line_number_area.width(), rect.height()
+ )
+
+ if rect.contains(self.viewport().rect()):
+ self.update_line_number_area_width(0)
+
+ def resizeEvent(self, event):
+ super().resizeEvent(event)
+ cr = self.contentsRect()
+ self.line_number_area.setGeometry(
+ QRect(cr.left(), cr.top(), self.line_number_area_width(), cr.height())
+ )
+
+ def line_number_area_paint_event(self, event):
+ from PySide6.QtGui import QPainter
+
+ painter = QPainter(self.line_number_area)
+ painter.fillRect(event.rect(), QColorConstants.LightGray)
+
+ block = self.firstVisibleBlock()
+ block_number = block.blockNumber()
+ top = int(
+ self.blockBoundingGeometry(block).translated(self.contentOffset()).top()
+ )
+ bottom = top + int(self.blockBoundingRect(block).height())
+
+ while block.isValid() and top <= event.rect().bottom():
+ if block.isVisible() and bottom >= event.rect().top():
+ number = str(block_number + 1)
+ painter.setPen(QColorConstants.Black)
+ painter.drawText(
+ 0,
+ top,
+ self.line_number_area.width() - 2,
+ self.fontMetrics().height(),
+ Qt.AlignmentFlag.AlignRight,
+ number,
+ )
+ block = block.next()
+ top = bottom
+ bottom = top + int(self.blockBoundingRect(block).height())
+ block_number += 1
+
+ def highlight_current_line(self):
+ extra_selections = []
+ if not self.isReadOnly():
+ selection = QTextEdit.ExtraSelection()
+ line_color = QColorConstants.Yellow.lighter(160)
+ selection.format.setBackground(line_color) # pyright: ignore[reportAttributeAccessIssue]
+ selection.format.setProperty(QTextFormat.Property.FullWidthSelection, True) # pyright: ignore[reportAttributeAccessIssue]
+ selection.cursor = self.textCursor() # pyright: ignore[reportAttributeAccessIssue]
+ selection.cursor.clearSelection() # pyright: ignore[reportAttributeAccessIssue]
+ extra_selections.append(selection)
+ self.setExtraSelections(extra_selections)
+
+ def highlight_position(self, lineno: int, col: int, color: QColor):
+ block = self.document().findBlockByLineNumber(lineno - 1)
+ if block.isValid():
+ cursor = QTextCursor(block)
+ text = block.text()
+ start = block.position() + max(0, col - 1)
+ cursor.setPosition(start)
+ if col <= len(text):
+ cursor.movePosition(
+ QTextCursor.MoveOperation.NextCharacter,
+ QTextCursor.MoveMode.KeepAnchor,
+ )
+
+ extra = QTextEdit.ExtraSelection()
+ extra.format.setBackground(color.lighter(160)) # pyright: ignore[reportAttributeAccessIssue]
+ extra.cursor = cursor # pyright: ignore[reportAttributeAccessIssue]
+
+ self.setExtraSelections(self.extraSelections() + [extra])
+
+ def highlight_line(self, lineno: int, color: QColor):
+ block = self.document().findBlockByLineNumber(lineno - 1)
+ if block.isValid():
+ cursor = QTextCursor(block)
+ cursor.select(QTextCursor.SelectionType.LineUnderCursor)
+
+ extra = QTextEdit.ExtraSelection()
+ extra.format.setBackground(color.lighter(160)) # pyright: ignore[reportAttributeAccessIssue]
+ extra.cursor = cursor # pyright: ignore[reportAttributeAccessIssue]
+
+ self.setExtraSelections(self.extraSelections() + [extra])
+
+ def clear_highlighting(self):
+ self.highlight_current_line()
+
+
+# ------------------------
+# Main App
+# ------------------------
+class JinjaTester(QMainWindow):
+ def __init__(self):
+ super().__init__()
+ self.setWindowTitle("Jinja Template Tester")
+ self.resize(1200, 800)
+
+ central = QWidget()
+ main_layout = QVBoxLayout(central)
+
+ # -------- Top input area --------
+ input_layout = QHBoxLayout()
+
+ # Template editor with label
+ template_layout = QVBoxLayout()
+ template_label = QLabel("Jinja2 Template")
+ template_layout.addWidget(template_label)
+ self.template_edit = CodeEditor()
+ template_layout.addWidget(self.template_edit)
+ input_layout.addLayout(template_layout)
+
+ # JSON editor with label
+ json_layout = QVBoxLayout()
+ json_label = QLabel("Context (JSON)")
+ json_layout.addWidget(json_label)
+ self.json_edit = CodeEditor()
+ self.json_edit.setPlainText("""
+{
+ "add_generation_prompt": true,
+ "bos_token": "",
+ "eos_token": "",
+ "messages": [
+ {
+ "role": "user",
+ "content": "What is the capital of Poland?"
+ }
+ ]
+}
+ """.strip())
+ json_layout.addWidget(self.json_edit)
+ input_layout.addLayout(json_layout)
+
+ main_layout.addLayout(input_layout)
+
+ # -------- Rendered output area --------
+ output_label = QLabel("Rendered Output")
+ main_layout.addWidget(output_label)
+ self.output_edit = QPlainTextEdit()
+ self.output_edit.setReadOnly(True)
+ main_layout.addWidget(self.output_edit)
+
+ # -------- Render button and status --------
+ btn_layout = QHBoxLayout()
+
+ # Load template button
+ self.load_btn = QPushButton("Load Template")
+ self.load_btn.clicked.connect(self.load_template)
+ btn_layout.addWidget(self.load_btn)
+
+ # Format template button
+ self.format_btn = QPushButton("Format")
+ self.format_btn.clicked.connect(self.format_template)
+ btn_layout.addWidget(self.format_btn)
+
+ self.render_btn = QPushButton("Render")
+ self.render_btn.clicked.connect(self.render_template)
+ btn_layout.addWidget(self.render_btn)
+ main_layout.addLayout(btn_layout)
+
+ # Status label below buttons
+ self.status_label = QLabel("Ready")
+ main_layout.addWidget(self.status_label)
+
+ self.setCentralWidget(central)
+
+ def render_template(self):
+ self.template_edit.clear_highlighting()
+ self.output_edit.clear()
+
+ template_str = self.template_edit.toPlainText()
+ json_str = self.json_edit.toPlainText()
+
+ # Parse JSON context
+ try:
+ context = json.loads(json_str) if json_str.strip() else {}
+ except Exception as e:
+ self.status_label.setText(f"❌ JSON Error: {e}")
+ return
+
+ def raise_exception(text: str) -> str:
+ raise RuntimeError(text)
+
+ env = ImmutableSandboxedEnvironment(
+ trim_blocks=True,
+ lstrip_blocks=True,
+ extensions=[jinja2_ext.loopcontrols],
+ )
+ env.filters["tojson"] = (
+ lambda x,
+ indent=None,
+ separators=None,
+ sort_keys=False,
+ ensure_ascii=False: json.dumps(
+ x,
+ indent=indent,
+ separators=separators,
+ sort_keys=sort_keys,
+ ensure_ascii=ensure_ascii,
+ )
+ )
+ env.globals["strftime_now"] = lambda format: datetime.now().strftime(format)
+ env.globals["raise_exception"] = raise_exception
+ try:
+ template = env.from_string(template_str)
+ output = template.render(context)
+ self.output_edit.setPlainText(output)
+ self.status_label.setText("✅ Render successful")
+ except TemplateSyntaxError as e:
+ self.status_label.setText(f"❌ Syntax Error (line {e.lineno}): {e.message}")
+ if e.lineno:
+ self.template_edit.highlight_line(e.lineno, QColor("red"))
+ except Exception as e:
+ # Catch all runtime errors
+ # Try to extract template line number
+ lineno = None
+ tb = e.__traceback__
+ while tb:
+ frame = tb.tb_frame
+ if frame.f_code.co_filename == "<template>":
+ lineno = tb.tb_lineno
+ break
+ tb = tb.tb_next
+
+ error_msg = f"Runtime Error: {type(e).__name__}: {e}"
+ if lineno:
+ error_msg = f"Runtime Error at line {lineno} in template: {type(e).__name__}: {e}"
+ self.template_edit.highlight_line(lineno, QColor("orange"))
+
+ self.output_edit.setPlainText(error_msg)
+ self.status_label.setText(f"❌ {error_msg}")
+
+ def load_template(self):
+ """Load a Jinja template from a file using a file dialog."""
+ file_path, _ = QFileDialog.getOpenFileName(
+ self,
+ "Load Jinja Template",
+ "",
+ "Template Files (*.jinja *.j2 *.html *.txt);;All Files (*)",
+ )
+
+ if file_path:
+ try:
+ with open(file_path, "r", encoding="utf-8") as file:
+ content = file.read()
+ self.template_edit.setPlainText(content)
+ self.status_label.setText(f"✅ Loaded template from {file_path}")
+ except Exception as e:
+ self.status_label.setText(f"❌ Error loading file: {str(e)}")
+
+ def format_template(self):
+ """Format the Jinja template using Jinja2's lexer for proper parsing."""
+ try:
+ template_content = self.template_edit.toPlainText()
+ if not template_content.strip():
+ self.status_label.setText("⚠️ Template is empty")
+ return
+
+ formatted_content = format_template_content(template_content)
+ self.template_edit.setPlainText(formatted_content)
+ self.status_label.setText("✅ Template formatted")
+ except Exception as e:
+ self.status_label.setText(f"❌ Error formatting template: {str(e)}")
+
+
+if __name__ == "__main__":
+ if len(sys.argv) > 1:
+ # CLI mode
+ parser = argparse.ArgumentParser(description="Jinja Template Tester")
+ parser.add_argument(
+ "--template", required=True, help="Path to Jinja template file"
+ )
+ parser.add_argument("--context", required=True, help="JSON string for context")
+ parser.add_argument(
+ "--action",
+ choices=["format", "render"],
+ default="render",
+ help="Action to perform",
+ )
+ args = parser.parse_args()
+
+ # Load template
+ with open(args.template, "r", encoding="utf-8") as f:
+ template_content = f.read()
+
+ # Load JSON
+ context = json.loads(args.context)
+ # Add missing variables
+ context.setdefault("bos_token", "")
+ context.setdefault("eos_token", "")
+ context.setdefault("add_generation_prompt", False)
+
+ env = ImmutableSandboxedEnvironment()
+
+ if args.action == "format":
+ formatted = format_template_content(template_content)
+ print(formatted) # noqa: NP100
+ elif args.action == "render":
+ template = env.from_string(template_content)
+ output = template.render(context)
+ print(output) # noqa: NP100
+
+ else:
+ # GUI mode
+ app = QApplication(sys.argv)
+ window = JinjaTester()
+ window.show()
+ sys.exit(app.exec())
diff --git a/llama.cpp/scripts/jinja/requirements.txt b/llama.cpp/scripts/jinja/requirements.txt
new file mode 100644
index 0000000..253685b
--- /dev/null
+++ b/llama.cpp/scripts/jinja/requirements.txt
@@ -0,0 +1,2 @@
+PySide6
+jinja2
diff --git a/llama.cpp/scripts/pr2wt.sh b/llama.cpp/scripts/pr2wt.sh
new file mode 100755
index 0000000..bd635f3
--- /dev/null
+++ b/llama.cpp/scripts/pr2wt.sh
@@ -0,0 +1,79 @@
+#!/usr/bin/env bash
+
+# intialize a new worktree from a PR number:
+#
+# - creates a new remote using the fork's clone URL
+# - creates a local branch tracking the remote branch
+# - creates a new worktree in a parent folder, suffixed with "-pr-$PR"
+#
+# sample usage:
+# ./scripts/pr2wt.sh 12345
+# ./scripts/pr2wt.sh 12345 opencode
+# ./scripts/pr2wt.sh 12345 "cmake -B build && cmake --build build"
+# ./scripts/pr2wt.sh 12345 "bash -l"
+
+function usage() {
+ echo "usage: $0 <pr_number> [cmd]"
+ exit 1
+}
+
+# check we are in the right directory
+if [[ ! -f "scripts/pr2wt.sh" ]]; then
+ echo "error: this script must be run from the root of the repository"
+ exit 1
+fi
+
+if [[ $# -lt 1 || $# -gt 2 ]]; then
+ usage
+fi
+
+PR=$1
+[[ "$PR" =~ ^[0-9]+$ ]] || { echo "error: PR number must be numeric"; exit 1; }
+
+url_origin=$(git config --get remote.origin.url) || {
+ echo "error: no remote named 'origin' in this repository"
+ exit 1
+}
+
+org_repo=$(echo $url_origin | cut -d/ -f4-)
+org_repo=${org_repo%.git}
+
+echo "org/repo: $org_repo"
+
+meta=$(curl -sSLf -H "Accept: application/vnd.github+json" "https://api.github.com/repos/$org_repo/pulls/$PR")
+
+url_remote=$(echo "$meta" | jq -r '.head.repo.clone_url')
+head_ref=$(echo "$meta" | jq -r '.head.ref')
+
+echo "url: $url_remote"
+echo "head_ref: $head_ref"
+
+url_remote_cur=$(git config --get "remote.pr/$PR.url" 2>/dev/null || true)
+
+if [[ "$url_remote_cur" != "$url_remote" ]]; then
+ git remote rm pr/$PR 2> /dev/null
+ git remote add pr/$PR "$url_remote"
+fi
+
+git fetch "pr/$PR" "$head_ref"
+
+dir=$(basename $(pwd))
+
+git branch -D pr/$PR 2> /dev/null
+git worktree add -b pr/$PR ../$dir-pr-$PR pr/$PR/$head_ref 2> /dev/null
+
+wt_path=$(cd ../$dir-pr-$PR && pwd)
+
+echo "git worktree created in $wt_path"
+
+cd $wt_path
+git branch --set-upstream-to=pr/$PR/$head_ref
+git pull --ff-only || {
+ echo "error: failed to pull pr/$PR"
+ exit 1
+}
+
+if [[ $# -eq 2 ]]; then
+ echo "executing: $2"
+ eval "$2"
+fi
diff --git a/llama.cpp/scripts/serve-static.js b/llama.cpp/scripts/serve-static.js
new file mode 100644
index 0000000..df4953e
--- /dev/null
+++ b/llama.cpp/scripts/serve-static.js
@@ -0,0 +1,110 @@
+const http = require('http');
+const fs = require('fs').promises;
+const path = require('path');
+
+// This file is used for testing wasm build from emscripten
+// Example build command:
+// emcmake cmake -B build-wasm -DGGML_WEBGPU=ON -DLLAMA_OPENSSL=OFF
+// cmake --build build-wasm --target test-backend-ops -j
+
+const PORT = 8080;
+const STATIC_DIR = path.join(__dirname, '../build-wasm/bin');
+console.log(`Serving static files from: ${STATIC_DIR}`);
+
+const mimeTypes = {
+ '.html': 'text/html',
+ '.js': 'text/javascript',
+ '.css': 'text/css',
+ '.png': 'image/png',
+ '.jpg': 'image/jpeg',
+ '.gif': 'image/gif',
+ '.svg': 'image/svg+xml',
+ '.json': 'application/json',
+ '.woff': 'font/woff',
+ '.woff2': 'font/woff2',
+};
+
+async function generateDirListing(dirPath, reqUrl) {
+ const files = await fs.readdir(dirPath);
+ let html = `
+ <!DOCTYPE html>
+ <html>
+ <head>
+ <title>Directory Listing</title>
+ <style>
+ body { font-family: Arial, sans-serif; padding: 20px; }
+ ul { list-style: none; padding: 0; }
+ li { margin: 5px 0; }
+ a { text-decoration: none; color: #0066cc; }
+ a:hover { text-decoration: underline; }
+ </style>
+ </head>
+ <body>
+ <h1>Directory: ${reqUrl}</h1>
+ <ul>
+ `;
+
+ if (reqUrl !== '/') {
+ html += `<li><a href="../">../ (Parent Directory)</a></li>`;
+ }
+
+ for (const file of files) {
+ const filePath = path.join(dirPath, file);
+ const stats = await fs.stat(filePath);
+ const link = encodeURIComponent(file) + (stats.isDirectory() ? '/' : '');
+ html += `<li><a href="${link}">${file}${stats.isDirectory() ? '/' : ''}</a></li>`;
+ }
+
+ html += `
+ </ul>
+ </body>
+ </html>
+ `;
+ return html;
+}
+
+const server = http.createServer(async (req, res) => {
+ try {
+ // Set COOP and COEP headers
+ res.setHeader('Cross-Origin-Opener-Policy', 'same-origin');
+ res.setHeader('Cross-Origin-Embedder-Policy', 'require-corp');
+ res.setHeader('Cache-Control', 'no-store, no-cache, must-revalidate, proxy-revalidate');
+ res.setHeader('Pragma', 'no-cache');
+ res.setHeader('Expires', '0');
+
+ const filePath = path.join(STATIC_DIR, decodeURIComponent(req.url));
+ const stats = await fs.stat(filePath);
+
+ if (stats.isDirectory()) {
+ const indexPath = path.join(filePath, 'index.html');
+ try {
+ const indexData = await fs.readFile(indexPath);
+ res.writeHeader(200, { 'Content-Type': 'text/html' });
+ res.end(indexData);
+ } catch {
+ // No index.html, generate directory listing
+ const dirListing = await generateDirListing(filePath, req.url);
+ res.writeHeader(200, { 'Content-Type': 'text/html' });
+ res.end(dirListing);
+ }
+ } else {
+ const ext = path.extname(filePath).toLowerCase();
+ const contentType = mimeTypes[ext] || 'application/octet-stream';
+ const data = await fs.readFile(filePath);
+ res.writeHeader(200, { 'Content-Type': contentType });
+ res.end(data);
+ }
+ } catch (err) {
+ if (err.code === 'ENOENT') {
+ res.writeHeader(404, { 'Content-Type': 'text/plain' });
+ res.end('404 Not Found');
+ } else {
+ res.writeHeader(500, { 'Content-Type': 'text/plain' });
+ res.end('500 Internal Server Error');
+ }
+ }
+});
+
+server.listen(PORT, () => {
+ console.log(`Server running at http://localhost:${PORT}/`);
+});
diff --git a/llama.cpp/scripts/server-bench.py b/llama.cpp/scripts/server-bench.py
new file mode 100755
index 0000000..dbbb093
--- /dev/null
+++ b/llama.cpp/scripts/server-bench.py
@@ -0,0 +1,297 @@
+#!/usr/bin/env python3
+
+import argparse
+import json
+import os
+import random
+import sqlite3
+import subprocess
+from time import sleep, time
+from typing import Optional, Union
+
+import datasets
+import logging
+import matplotlib.pyplot as plt
+import numpy as np
+import requests
+from tqdm.contrib.concurrent import thread_map
+
+
+logging.basicConfig(level=logging.INFO, format='%(message)s')
+logger = logging.getLogger("server-bench")
+
+
+def get_prompts_text(dataset_name: str, n_prompts: int) -> Optional[list[str]]:
+ ret = []
+ if dataset_name.lower() == "mmlu":
+ logger.info("Loading MMLU dataset...")
+ ret = datasets.load_dataset("cais/mmlu", "all")["test"]["question"] # type: ignore
+ else:
+ return None
+ if n_prompts >= 0:
+ ret = ret[:n_prompts]
+ return ret
+
+
+def get_prompt_lengths_rng(n_prompts: int, prompt_length_min: int, prompt_length_max: int, seed_offset: int) -> list[int]:
+ assert n_prompts >= 0
+ ret: list[int] = []
+ for i in range(n_prompts):
+ if seed_offset >= 0:
+ random.seed(3 * (seed_offset + 1000 * i) + 0)
+ ret.append(random.randint(prompt_length_min, prompt_length_max))
+ return ret
+
+
+def get_prompts_rng(prompt_lengths: list[int]) -> list[list[int]]:
+ return [[random.randint(100, 10000) for _ in range(pl)] for pl in prompt_lengths]
+
+
+def get_server(path_server: str, path_log: Optional[str]) -> dict:
+ if path_server.startswith("http://") or path_server.startswith("https://"):
+ return {"process": None, "address": path_server, "fout": None}
+ if os.environ.get("LLAMA_ARG_HOST") is None:
+ logger.info("LLAMA_ARG_HOST not explicitly set, using 127.0.0.1")
+ os.environ["LLAMA_ARG_HOST"] = "127.0.0.1"
+ if os.environ.get("LLAMA_ARG_PORT") is None:
+ logger.info("LLAMA_ARG_PORT not explicitly set, using 8080")
+ os.environ["LLAMA_ARG_PORT"] = "8080"
+ hostname: Optional[str] = os.environ.get("LLAMA_ARG_HOST")
+ port: Optional[str] = os.environ.get("LLAMA_ARG_PORT")
+ assert hostname is not None
+ assert port is not None
+ address: str = f"http://{hostname}:{port}"
+ logger.info(f"Starting the llama.cpp server under {address}...")
+
+ fout = open(path_log.format(port=port), "w") if path_log is not None else subprocess.DEVNULL
+ process = subprocess.Popen([path_server], stdout=fout, stderr=subprocess.STDOUT)
+
+ n_failures: int = 0
+ while True:
+ try:
+ sleep(1.0)
+ exit_code = process.poll()
+ if exit_code is not None:
+ raise RuntimeError(f"llama.cpp server exited unexpectedly with exit code {exit_code}{path_log and f', see {path_log.format(port=port)}' or ''}")
+ response = requests.get(f"{address}/health")
+ if response.status_code == 200:
+ break
+ except requests.ConnectionError:
+ n_failures += 1
+ if n_failures >= 10:
+ raise RuntimeError("llama.cpp server is not healthy after 10 seconds")
+
+ return {"process": process, "address": address, "fout": fout}
+
+
+def get_prompt_length(data: dict) -> int:
+ session = data["session"]
+ server_address: str = data["server_address"]
+
+ response = session.post(
+ f"{server_address}/apply-template",
+ json={"messages": [{"role": "user", "content": data["prompt"], "stream": True}]}
+ )
+ response.raise_for_status()
+ prompt: str = json.loads(response.text)["prompt"]
+ response = session.post(
+ f"{server_address}/tokenize",
+ json={"content": prompt, "add_special": True}
+ )
+ response.raise_for_status()
+ tokens: list[str] = json.loads(response.text)["tokens"]
+ return len(tokens)
+
+
+def send_prompt(data: dict) -> tuple[float, list[float]]:
+ session = data["session"]
+ server_address: str = data["server_address"]
+
+ t_submit = time()
+ if data["external_server"]:
+ json_data: dict = {
+ "prompt": data["prompt"], "ignore_eos": True,
+ "seed": data["seed"], "max_tokens": data["n_predict"], "stream": True}
+ response = session.post(f"{server_address}/v1/completions", json=json_data, stream=True)
+ elif data["synthetic_prompt"]:
+ json_data: dict = {
+ "prompt": data["prompt"], "ignore_eos": True, "cache_prompt": False,
+ "seed": data["seed"], "n_predict": data["n_predict"], "stream": True}
+ response = session.post(f"{server_address}/completion", json=json_data, stream=True)
+ else:
+ response = session.post(
+ f"{server_address}/apply-template",
+ json={"messages": [{"role": "user", "content": data["prompt"], "stream": True}]}
+ )
+ response.raise_for_status()
+ prompt: str = json.loads(response.text)["prompt"]
+
+ json_data: dict = {"prompt": prompt, "seed": data["seed"], "n_predict": data["n_predict"], "stream": True}
+ response = session.post(f"{server_address}/completion", json=json_data, stream=True)
+ response.raise_for_status()
+
+ lines = []
+ token_arrival_times: list[float] = []
+ for line in response.iter_lines(decode_unicode=False):
+ if not line.startswith(b"data: "):
+ continue
+ lines.append(line)
+ token_arrival_times.append(time())
+ token_arrival_times = token_arrival_times[:-1]
+ if len(lines) > 1 and "timings" in json.loads(lines[-2][6:]):
+ token_arrival_times = token_arrival_times[:-1]
+
+ return (t_submit, token_arrival_times)
+
+
+def benchmark(
+ path_server: str, path_log: Optional[str], path_db: Optional[str], name: Optional[str], prompt_source: str, n_prompts: int,
+ n_predict: int, n_predict_min: int, seed_offset: int):
+ external_server: bool = path_server.startswith("http://") or path_server.startswith("https://")
+ if os.environ.get("LLAMA_ARG_N_PARALLEL") is None:
+ logger.info("LLAMA_ARG_N_PARALLEL not explicitly set, using 32")
+ os.environ["LLAMA_ARG_N_PARALLEL"] = "32"
+
+ parallel: int = int(os.environ.get("LLAMA_ARG_N_PARALLEL")) # type: ignore
+ prompts: Union[None, list[str], list[list[int]]] = get_prompts_text(prompt_source, n_prompts)
+ synthetic_prompts: bool = prompts is None
+ prompt_n = []
+
+ if synthetic_prompts:
+ prompt_source_split: list[str] = prompt_source.split("-")
+ assert len(prompt_source_split) == 3
+ assert prompt_source_split[0].lower() == "rng"
+ prompt_length_min: int = int(prompt_source_split[1])
+ prompt_length_max: int = int(prompt_source_split[2])
+ logger.info("Generating random prompts...")
+ prompt_n = get_prompt_lengths_rng(n_prompts, prompt_length_min, prompt_length_max, seed_offset)
+ prompts = get_prompts_rng(prompt_n)
+ else:
+ n_predict_min = n_predict
+
+ if not external_server and os.environ.get("LLAMA_ARG_CTX_SIZE") is None:
+ context_per_slot: int = int(1.05 * (n_predict + (np.max(prompt_n) if synthetic_prompts else 2048)))
+ context_total: int = context_per_slot * parallel
+ os.environ["LLAMA_ARG_CTX_SIZE"] = str(context_total)
+ logger.info(f"LLAMA_ARG_CTX_SIZE not explicitly set, using {context_total} ({context_per_slot} per slot).")
+
+ server: Optional[dict] = None
+ session = None
+ try:
+ server = get_server(path_server, path_log)
+ server_address: str = server["address"]
+ assert external_server == (server["process"] is None)
+
+ adapter = requests.adapters.HTTPAdapter(pool_connections=parallel, pool_maxsize=parallel) # type: ignore
+ session = requests.Session()
+ session.mount("http://", adapter)
+ session.mount("https://", adapter)
+
+ data: list[dict] = []
+
+ for i, p in enumerate(prompts):
+ if seed_offset >= 0:
+ random.seed(3 * (seed_offset + 1000 * i) + 1)
+ data.append({
+ "session": session, "server_address": server_address, "external_server": external_server, "prompt": p,
+ "synthetic_prompt": synthetic_prompts, "n_predict": random.randint(n_predict_min, n_predict),
+ "seed": (3 * (seed_offset + 1000 * i) + 2) if seed_offset >= 0 else -1})
+
+ if not synthetic_prompts:
+ logger.info("Getting the prompt lengths...")
+ prompt_n = [get_prompt_length(d) for d in data]
+
+ logger.info("Starting the benchmark...\n")
+ t0 = time()
+ results: list[tuple[float, list[float]]] = thread_map(send_prompt, data, max_workers=parallel, chunksize=1)
+ finally:
+ if server is not None and server["process"] is not None:
+ server["process"].terminate()
+ server["process"].wait()
+ if session is not None:
+ session.close()
+
+ prompt_t = []
+ token_t = []
+ depth_sum: int = 0
+ for pn, (t_submit, tat) in zip(prompt_n, results):
+ prompt_t.append(tat[0] - t_submit)
+ token_t += tat
+ n_tokens: int = len(tat)
+ depth_sum += n_tokens * pn
+ depth_sum += n_tokens * (n_tokens + 1) // 2
+ assert len(token_t) > 0
+ prompt_n = np.array(prompt_n, dtype=np.int64)
+ prompt_t = np.array(prompt_t, dtype=np.float64)
+ token_t = np.array(token_t, dtype=np.float64)
+
+ token_t -= t0
+ token_t_last = np.max(token_t)
+
+ logger.info("")
+ logger.info(f"Benchmark duration: {token_t_last:.2f} s")
+ logger.info(f"Request throughput: {n_prompts / token_t_last:.2f} requests/s = {n_prompts / (token_t_last/60):.2f} requests/min")
+ logger.info(f"Total prompt length: {np.sum(prompt_n)} tokens")
+ logger.info(f"Average prompt length: {np.mean(prompt_n):.2f} tokens")
+ logger.info(f"Average prompt latency: {1e3 * np.mean(prompt_t):.2f} ms")
+ logger.info(f"Average prompt speed: {np.sum(prompt_n) / np.sum(prompt_t):.2f} tokens/s")
+ logger.info(f"Total generated tokens: {token_t.shape[0]}")
+ logger.info(f"Average generation depth: {depth_sum / token_t.shape[0]:.2f} tokens")
+ logger.info(f"Average total generation speed: {token_t.shape[0] / token_t_last:.2f} tokens/s")
+ logger.info(f"Average generation speed per slot: {token_t.shape[0] / (parallel * token_t_last):.2f} tokens/s / slot")
+
+ if path_db is not None:
+ con = sqlite3.connect(path_db)
+ cursor = con.cursor()
+ cursor.execute(
+ "CREATE TABLE IF NOT EXISTS server_bench"
+ "(name TEXT, n_parallel INTEGER, prompt_source TEXT, n_prompts INTEGER, "
+ "n_predict INTEGER, n_predict_min INTEGER, seed_offset INTEGER, runtime REAL);")
+ cursor.execute(
+ "INSERT INTO server_bench VALUES (?, ?, ?, ?, ?, ?, ?, ?);",
+ [name, parallel, prompt_source, n_prompts, n_predict, n_predict_min, seed_offset, token_t_last])
+ con.commit()
+
+ plt.figure()
+ plt.scatter(prompt_n, 1e3 * prompt_t, s=10.0, marker=".", alpha=0.25)
+ plt.xlim(0, 1.05e0 * np.max(prompt_n))
+ plt.ylim(0, 1.05e3 * np.max(prompt_t))
+ plt.title(name or "")
+ plt.xlabel("Prompt length [tokens]")
+ plt.ylabel("Time to first token [ms]")
+ plt.savefig("prompt_time.png", dpi=240)
+
+ bin_max = np.ceil(token_t_last) + 1
+ plt.figure()
+ plt.hist(token_t, np.arange(0, bin_max))
+ plt.xlim(0, bin_max + 1)
+ plt.title(name or "")
+ plt.xlabel("Time [s]")
+ plt.ylabel("Num. tokens generated per second")
+ plt.savefig("gen_rate.png", dpi=240)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description="Tool for benchmarking the throughput of the llama.cpp HTTP server. "
+ "Results are printed to console and visualized as plots (saved to current working directory). "
+ "To pass arguments such as the model path to the server, set the corresponding environment variables (see llama-server --help). "
+ "The reported numbers are the speeds as observed by the Python script and may differ from the performance reported by the server, "
+ "particularly when the server is fast vs. the network or Python script (e.g. when serving a very small model).")
+ parser.add_argument("--path_server", type=str, default="llama-server", help="Path to the llama.cpp server binary")
+ parser.add_argument("--path_log", type=str, default="server-bench-{port}.log", help="Path to the model to use for the benchmark")
+ parser.add_argument("--path_db", type=str, default=None, help="Path to an sqlite database to store the benchmark results in")
+ parser.add_argument("--name", type=str, default=None, help="Name to label plots and database entries with")
+ parser.add_argument(
+ "--prompt_source", type=str, default="rng-1024-2048",
+ help="How to get the prompts for the benchmark, either 'mmlu' for MMLU questions or "
+ "rng-MIN-MAX for synthetic prompts with random lengths in the interval [MIN, MAX]")
+ parser.add_argument("--n_prompts", type=int, default=100, help="Number of prompts to evaluate")
+ parser.add_argument("--n_predict", type=int, default=2048, help="Max. number of tokens to predict per prompt")
+ parser.add_argument(
+ "--n_predict_min", type=int, default=1024,
+ help="Min. number of tokens to predict per prompt (supported for synthetic prompts only)")
+ parser.add_argument("--seed_offset", type=int, default=0, help="Offset for determining the seeds for pseudorandom prompt/generation lengths. "
+ "Corelations between seeds can occur when set >= 1000. Negative values mean no seed.")
+ args = parser.parse_args()
+ benchmark(**vars(args))
diff --git a/llama.cpp/scripts/snapdragon/adb/llama-cli.farf b/llama.cpp/scripts/snapdragon/adb/llama-cli.farf
new file mode 100644
index 0000000..de84fe8
--- /dev/null
+++ b/llama.cpp/scripts/snapdragon/adb/llama-cli.farf
@@ -0,0 +1 @@
+0xffff
diff --git a/llama.cpp/scripts/snapdragon/adb/run-bench.sh b/llama.cpp/scripts/snapdragon/adb/run-bench.sh
new file mode 100755
index 0000000..2750860
--- /dev/null
+++ b/llama.cpp/scripts/snapdragon/adb/run-bench.sh
@@ -0,0 +1,52 @@
+#!/bin/sh
+#
+
+# Basedir on device
+basedir=/data/local/tmp/llama.cpp
+
+branch=.
+[ "$B" != "" ] && branch=$B
+
+adbserial=
+[ "$S" != "" ] && adbserial="-s $S"
+
+adbhost=
+[ "$H" != "" ] && adbhost="-H $H"
+
+model="Llama-3.2-3B-Instruct-Q4_0.gguf"
+[ "$M" != "" ] && model="$M"
+
+device="HTP0"
+[ "$D" != "" ] && device="$D"
+
+verbose=
+[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V" cli_opts="$cli_opts -v"
+
+experimental=
+[ "$E" != "" ] && experimental="GGML_HEXAGON_EXPERIMENTAL=$E"
+
+profile=
+[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF GGML_HEXAGON_OPSYNC=1" cli_opts="$cli_opts -v"
+
+opmask=
+[ "$OPMASK" != "" ] && opmask="GGML_HEXAGON_OPMASK=$OPMASK"
+
+nhvx=
+[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX"
+
+ndev=
+[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV"
+
+hb=
+[ "$HB" != "" ] && hb="GGML_HEXAGON_HOSTBUF=$HB"
+
+set -x
+
+adb $adbserial $adbhost shell " \
+ cd $basedir; \
+ LD_LIBRARY_PATH=$basedir/$branch/lib \
+ ADSP_LIBRARY_PATH=$basedir/$branch/lib \
+ $ndev $nhvx $opmask $verbose $experimental $profile $hb ./$branch/bin/llama-bench --device $device --mmap 0 -m $basedir/../gguf/$model \
+ --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \
+ --batch-size 128 -ngl 99 $cli_opts $@ \
+"
diff --git a/llama.cpp/scripts/snapdragon/adb/run-cli.sh b/llama.cpp/scripts/snapdragon/adb/run-cli.sh
new file mode 100755
index 0000000..d19d4e9
--- /dev/null
+++ b/llama.cpp/scripts/snapdragon/adb/run-cli.sh
@@ -0,0 +1,59 @@
+#!/bin/sh
+#
+
+# Basedir on device
+basedir=/data/local/tmp/llama.cpp
+
+cli_opts=
+
+branch=.
+[ "$B" != "" ] && branch=$B
+
+adbserial=
+[ "$S" != "" ] && adbserial="-s $S"
+
+adbhost=
+[ "$H" != "" ] && adbhost="-H $H"
+
+model="Llama-3.2-3B-Instruct-Q4_0.gguf"
+[ "$M" != "" ] && model="$M"
+
+device="HTP0"
+[ "$D" != "" ] && device="$D"
+
+experimental=
+[ "$E" != "" ] && experimental="GGML_HEXAGON_EXPERIMENTAL=$E"
+
+verbose=
+[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V" cli_opts="$cli_opts -v"
+
+sched=
+[ "$SCHED" != "" ] && sched="GGML_SCHED_DEBUG=2" cli_opts="$cli_opts -v"
+
+profile=
+[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF GGML_HEXAGON_OPSYNC=1" cli_opts="$cli_opts -v"
+
+opmask=
+[ "$OPMASK" != "" ] && opmask="GGML_HEXAGON_OPMASK=$OPMASK"
+
+nhvx=
+[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX"
+
+ndev=
+[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV"
+
+hb=
+[ "$HB" != "" ] && hb="GGML_HEXAGON_HOSTBUF=$HB"
+
+set -x
+
+adb $adbserial $adbhost shell " \
+ cd $basedir; ulimit -c unlimited; \
+ LD_LIBRARY_PATH=$basedir/$branch/lib \
+ ADSP_LIBRARY_PATH=$basedir/$branch/lib \
+ $verbose $experimental $sched $opmask $profile $nhvx $ndev $hb \
+ ./$branch/bin/llama-cli --no-mmap -m $basedir/../gguf/$model \
+ --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \
+ --ctx-size 8192 --batch-size 128 -fa on \
+ -ngl 99 --device $device $cli_opts $@ \
+"
diff --git a/llama.cpp/scripts/snapdragon/adb/run-completion.sh b/llama.cpp/scripts/snapdragon/adb/run-completion.sh
new file mode 100755
index 0000000..da9df11
--- /dev/null
+++ b/llama.cpp/scripts/snapdragon/adb/run-completion.sh
@@ -0,0 +1,59 @@
+#!/bin/sh
+#
+
+# Basedir on device
+basedir=/data/local/tmp/llama.cpp
+
+cli_opts=
+
+branch=.
+[ "$B" != "" ] && branch=$B
+
+adbserial=
+[ "$S" != "" ] && adbserial="-s $S"
+
+adbhost=
+[ "$H" != "" ] && adbhost="-H $H"
+
+model="Llama-3.2-3B-Instruct-Q4_0.gguf"
+[ "$M" != "" ] && model="$M"
+
+device="HTP0"
+[ "$D" != "" ] && device="$D"
+
+experimental=
+[ "$E" != "" ] && experimental="GGML_HEXAGON_EXPERIMENTAL=$E"
+
+verbose=
+[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V" cli_opts="$cli_opts -v"
+
+sched=
+[ "$SCHED" != "" ] && sched="GGML_SCHED_DEBUG=2" cli_opts="$cli_opts -v"
+
+profile=
+[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF GGML_HEXAGON_OPSYNC=1" cli_opts="$cli_opts -v"
+
+opmask=
+[ "$OPMASK" != "" ] && opmask="GGML_HEXAGON_OPMASK=$OPMASK"
+
+nhvx=
+[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX"
+
+ndev=
+[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV"
+
+hb=
+[ "$HB" != "" ] && hb="GGML_HEXAGON_HOSTBUF=$HB"
+
+set -x
+
+adb $adbserial $adbhost shell " \
+ cd $basedir; ulimit -c unlimited; \
+ LD_LIBRARY_PATH=$basedir/$branch/lib \
+ ADSP_LIBRARY_PATH=$basedir/$branch/lib \
+ $verbose $experimental $sched $opmask $profile $nhvx $ndev $hb \
+ ./$branch/bin/llama-completion --no-mmap -m $basedir/../gguf/$model \
+ --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \
+ --ctx-size 8192 --batch-size 128 -fa on \
+ -ngl 99 -no-cnv --device $device $cli_opts $@ \
+"
diff --git a/llama.cpp/scripts/snapdragon/adb/run-mtmd.sh b/llama.cpp/scripts/snapdragon/adb/run-mtmd.sh
new file mode 100755
index 0000000..fc018e7
--- /dev/null
+++ b/llama.cpp/scripts/snapdragon/adb/run-mtmd.sh
@@ -0,0 +1,68 @@
+#!/bin/sh
+#
+
+# Basedir on device
+basedir=/data/local/tmp/llama.cpp
+
+cli_opts=
+
+branch=.
+[ "$B" != "" ] && branch=$B
+
+adbserial=
+[ "$S" != "" ] && adbserial="-s $S"
+
+adbhost=
+[ "$H" != "" ] && adbhost="-H $H"
+
+model="gemma-3-4b-it-Q4_0.gguf"
+[ "$M" != "" ] && model="$M"
+
+mmproj="mmproj-F16.gguf"
+[ "$MMPROJ" != "" ] && mmproj="$MMPROJ"
+
+image=
+[ "$IMG" != "" ] && image="$IMG"
+
+device="HTP0"
+[ "$D" != "" ] && device="$D"
+
+verbose=
+[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V"
+
+experimental="GGML_HEXAGON_EXPERIMENTAL=1"
+[ "$E" != "" ] && experimental="GGML_HEXAGON_EXPERIMENTAL=$E"
+
+sched=
+[ "$SCHED" != "" ] && sched="GGML_SCHED_DEBUG=2" cli_opts="$cli_opts -v"
+
+profile=
+[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF GGML_HEXAGON_OPSYNC=1"
+
+opmask=
+[ "$OPMASK" != "" ] && opmask="GGML_HEXAGON_OPMASK=$OPMASK"
+
+nhvx=
+[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX"
+
+ndev=
+[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV"
+
+# MTMD backend device for vision model (defaults to CPU if not set)
+mtmd_backend=
+[ "$MTMD_DEVICE" != "" ] && mtmd_backend="MTMD_BACKEND_DEVICE=$MTMD_DEVICE"
+
+set -x
+
+adb $adbserial $adbhost shell " \
+ cd $basedir; ulimit -c unlimited; \
+ LD_LIBRARY_PATH=$basedir/$branch/lib \
+ ADSP_LIBRARY_PATH=$basedir/$branch/lib \
+ $verbose $experimental $sched $opmask $profile $nhvx $ndev $mtmd_backend \
+ ./$branch/bin/llama-mtmd-cli --no-mmap -m $basedir/../gguf/$model \
+ --mmproj $basedir/../gguf/$mmproj \
+ --image $basedir/../gguf/$image \
+ --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \
+ --ctx-size 8192 --batch-size 128 -ctk q8_0 -ctv q8_0 -fa on \
+ -ngl 99 --device $device -v $cli_opts $@ \
+"
diff --git a/llama.cpp/scripts/snapdragon/adb/run-tool.sh b/llama.cpp/scripts/snapdragon/adb/run-tool.sh
new file mode 100755
index 0000000..4647ede
--- /dev/null
+++ b/llama.cpp/scripts/snapdragon/adb/run-tool.sh
@@ -0,0 +1,54 @@
+#!/bin/sh
+#
+
+# Basedir on device
+basedir=/data/local/tmp/llama.cpp
+
+cli_opts=
+
+branch=.
+[ "$B" != "" ] && branch=$B
+
+adbserial=
+[ "$S" != "" ] && adbserial="-s $S"
+
+adbhost=
+[ "$H" != "" ] && adbhost="-H $H"
+
+device="HTP0"
+[ "$D" != "" ] && device="$D"
+
+verbose=
+[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V"
+
+experimental=
+[ "$E" != "" ] && experimental="GGML_HEXAGON_EXPERIMENTAL=$E"
+
+sched=
+[ "$SCHED" != "" ] && sched="GGML_SCHED_DEBUG=2" cli_opts="$cli_opts -v"
+
+profile=
+[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF GGML_HEXAGON_OPSYNC=1"
+
+opmask=
+[ "$OPMASK" != "" ] && opmask="GGML_HEXAGON_OPMASK=$OPMASK"
+
+nhvx=
+[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX"
+
+ndev=
+[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV"
+
+hb=
+[ "$HB" != "" ] && hb="GGML_HEXAGON_HOSTBUF=$HB"
+
+set -x
+
+tool=$1; shift
+
+adb $adbserial $adbhost shell " \
+ cd $basedir; ulimit -c unlimited; \
+ LD_LIBRARY_PATH=$basedir/$branch/lib \
+ ADSP_LIBRARY_PATH=$basedir/$branch/lib \
+ $verbose $experimental $sched $opmask $profile $nhvx $ndev $hb ./$branch/bin/$tool $@ \
+"
diff --git a/llama.cpp/scripts/snapdragon/qdc/readme.md b/llama.cpp/scripts/snapdragon/qdc/readme.md
new file mode 100644
index 0000000..b92cf24
--- /dev/null
+++ b/llama.cpp/scripts/snapdragon/qdc/readme.md
@@ -0,0 +1 @@
+This directory includes pytest based scripts for running CI jobs on Qualcomm Device Cloud (QDC).
diff --git a/llama.cpp/scripts/snapdragon/qdc/requirements.txt b/llama.cpp/scripts/snapdragon/qdc/requirements.txt
new file mode 100644
index 0000000..f04bd68
--- /dev/null
+++ b/llama.cpp/scripts/snapdragon/qdc/requirements.txt
@@ -0,0 +1,25 @@
+Appium-Python-Client==5.2.4
+attrs==25.4.0
+certifi==2025.10.5
+exceptiongroup==1.3.0
+h11==0.16.0
+idna==3.11
+iniconfig==2.1.0
+outcome==1.3.0.post0
+packaging==25.0
+pluggy==1.6.0
+Pygments==2.19.2
+PySocks==1.7.1
+pytest==8.4.2
+pytest-dependency==0.6.0
+selenium==4.36.0
+setuptools==80.9.0
+sniffio==1.3.1
+sortedcontainers==2.4.0
+tomli==2.3.0
+trio==0.31.0
+trio-websocket==0.12.2
+typing_extensions==4.15.0
+urllib3==2.5.0
+websocket-client==1.9.0
+wsproto==1.2.0
diff --git a/llama.cpp/scripts/snapdragon/qdc/tests/test_bench.py b/llama.cpp/scripts/snapdragon/qdc/tests/test_bench.py
new file mode 100644
index 0000000..651ab5b
--- /dev/null
+++ b/llama.cpp/scripts/snapdragon/qdc/tests/test_bench.py
@@ -0,0 +1,63 @@
+import pytest
+import subprocess
+import sys
+
+tmp_path='/data/local/tmp'
+pkg_path=f'{tmp_path}/llama.cpp'
+lib_path=f'{pkg_path}/lib'
+bin_path=f'{pkg_path}/bin'
+
+model='../gguf/Llama-3.2-1B-Instruct-Q4_0.gguf'
+cli_pref=f'cd {pkg_path} && LD_LIBRARY_PATH={lib_path} ADSP_LIBRARY_PATH={lib_path} {bin_path}'
+
+
+def run_cmd(cmd):
+ p = subprocess.run(cmd, text = True, stdout = subprocess.PIPE, stderr = subprocess.STDOUT)
+ sys.stdout.write(p.stdout)
+ assert(p.returncode == 0)
+
+
+@pytest.mark.dependency()
+def test_install():
+ run_cmd(['adb', 'push', 'llama.cpp', f'{tmp_path}'])
+ run_cmd(['adb', 'shell', f'chmod 755 {bin_path}/*'])
+
+
+## Basic cli tests
+def run_llama_cli(dev, opts):
+ prompt='what is the most popular cookie in the world?\nPlease provide a very brief bullet point summary.\nBegin your answer with **BEGIN**.'
+ opts = '--batch-size 128 -n 128 -no-cnv --seed 42 ' + opts
+ run_cmd(['adb', 'shell', f'{cli_pref}/llama-cli -m {model} --device {dev} -ngl 99 -t 4 {opts} -p "{prompt}"'])
+
+
+@pytest.mark.dependency(depends=['test_install'])
+def test_llama_cli_cpu():
+ run_llama_cli('none', '-ctk q8_0 -ctv q8_0 -fa on')
+
+
+@pytest.mark.dependency(depends=['test_install'])
+def test_llama_cli_gpu():
+ run_llama_cli('GPUOpenCL', '-fa on')
+
+
+@pytest.mark.dependency(depends=['test_install'])
+def test_llama_cli_npu():
+ run_llama_cli('HTP0', '-ctk q8_0 -ctv q8_0 -fa on')
+
+
+## Basic bench tests
+def run_llama_bench(dev):
+ run_cmd(['adb', 'shell', f'{cli_pref}/llama-bench -m {model} --device {dev} -ngl 99 --batch-size 128 -t 4 -p 128 -n 32'])
+
+
+@pytest.mark.dependency(depends=['test_install'])
+def test_llama_bench_cpu():
+ run_llama_bench('none')
+
+
+def test_llama_bench_gpu():
+ run_llama_bench('GPUOpenCL')
+
+
+def test_llama_bench_npu():
+ run_llama_bench('HTP0')
diff --git a/llama.cpp/scripts/snapdragon/windows/run-bench.ps1 b/llama.cpp/scripts/snapdragon/windows/run-bench.ps1
new file mode 100644
index 0000000..21fd063
--- /dev/null
+++ b/llama.cpp/scripts/snapdragon/windows/run-bench.ps1
@@ -0,0 +1,40 @@
+
+#!/usr/bin/env pwsh
+
+# Basedir on device
+$basedir=".\pkg-snapdragon"
+
+$cli_opts=$args
+
+$model="Llama-3.2-3B-Instruct-Q4_0.gguf"
+if ($null -ne $env:M) {
+ $model=$env:M
+}
+
+$device="HTP0"
+if ($null -ne $env:D) {
+ $device=$env:D
+}
+
+if ($null -ne $env:V) {
+ $env:GGML_HEXAGON_VERBOSE=$env:V
+}
+
+if ($null -ne $env:OPMASK) {
+ $env:GGML_HEXAGON_OPMASK=$env:OPMASK
+}
+
+if ($null -ne $env:NHVX) {
+ $env:GGML_HEXAGON_NHVX=$env:NHVX
+}
+
+if ($null -ne $env:NDEV) {
+ $env:GGML_HEXAGON_NDEV=$env:NDEV
+}
+
+$env:ADSP_LIBRARY_PATH="$basedir\lib"
+
+& "$basedir\bin\llama-bench.exe" `
+ --mmap 0 -m $basedir\..\..\gguf\$model `
+ --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 `
+ --batch-size 128 -ngl 99 --device $device $cli_opts
diff --git a/llama.cpp/scripts/snapdragon/windows/run-cli.ps1 b/llama.cpp/scripts/snapdragon/windows/run-cli.ps1
new file mode 100644
index 0000000..b13161a
--- /dev/null
+++ b/llama.cpp/scripts/snapdragon/windows/run-cli.ps1
@@ -0,0 +1,53 @@
+
+#!/usr/bin/env pwsh
+
+# Basedir on device
+$basedir=".\pkg-snapdragon"
+
+$cli_opts=$args
+
+$model="Llama-3.2-3B-Instruct-Q4_0.gguf"
+if ($null -ne $env:M) {
+ $model=$env:M
+}
+
+$device="HTP0"
+if ($null -ne $env:D) {
+ $device=$env:D
+}
+
+if ($null -ne $env:V) {
+ $env:GGML_HEXAGON_VERBOSE=$env:V
+}
+
+if ($null -ne $env:E) {
+ $env:GGML_HEXAGON_EXPERIMENTAL=$env:E
+}
+
+if ($null -ne $env:SCHED) {
+ $env:GGML_SCHED_DEBUG=$env:SCHED; $cli_opts="$cli_opts -v"
+}
+
+if ($null -ne $env:PROF) {
+ $env:GGML_HEXAGON_PROFILE=$env:PROF; $env:GGML_HEXAGON_OPSYNC=1
+}
+
+if ($null -ne $env:OPMASK) {
+ $env:GGML_HEXAGON_OPMASK=$env:OPMASK
+}
+
+if ($null -ne $env:NHVX) {
+ $env:GGML_HEXAGON_NHVX=$env:NHVX
+}
+
+if ($null -ne $env:NDEV) {
+ $env:GGML_HEXAGON_NDEV=$env:NDEV
+}
+
+$env:ADSP_LIBRARY_PATH="$basedir\lib"
+
+& "$basedir\bin\llama-completion.exe" `
+ --no-mmap -no-cnv -m $basedir\..\..\gguf\$model `
+ --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 `
+ --ctx-size 8192 --batch-size 128 -ctk q8_0 -ctv q8_0 -fa on `
+ -ngl 99 --device $device $cli_opts
diff --git a/llama.cpp/scripts/snapdragon/windows/run-tool.ps1 b/llama.cpp/scripts/snapdragon/windows/run-tool.ps1
new file mode 100644
index 0000000..70094af
--- /dev/null
+++ b/llama.cpp/scripts/snapdragon/windows/run-tool.ps1
@@ -0,0 +1,56 @@
+
+#!/usr/bin/env pwsh
+
+# Basedir on device
+$basedir=".\pkg-snapdragon"
+
+if ($args.Count -eq 0) {
+ Write-Host "No arguments provided.Expected the tool and argument to run."
+ exit -1
+}
+
+$tool=$args[0]
+$cli_opts=@()
+
+if ($args.Count -gt 1) {
+ $cli_opts=$args[1..($args.Count - 1)]
+ $remainingArgs = $args[1..($args.Count - 1)]
+}
+
+$device="HTP0"
+if ($null -ne $env:D) {
+ $device=$env:D
+}
+
+if ($null -ne $env:V) {
+ $env:GGML_HEXAGON_VERBOSE=$env:V
+}
+
+if ($null -ne $env:E) {
+ $env:GGML_HEXAGON_EXPERIMENTAL=$env:E
+}
+
+if ($null -ne $env:SCHED) {
+ $env:GGML_SCHED_DEBUG=$env:SCHED; $cli_opts="$cli_opts -v"
+}
+
+if ($null -ne $env:PROF) {
+ $env:GGML_HEXAGON_PROFILE=$env:PROF; $env:GGML_HEXAGON_OPSYNC=1
+}
+
+if ($null -ne $env:OPMASK) {
+ $env:GGML_HEXAGON_OPMASK=$env:OPMASK
+}
+
+if ($null -ne $env:NHVX) {
+ $env:GGML_HEXAGON_NHVX=$env:NHVX
+}
+
+if ($null -ne $env:NDEV) {
+ $env:GGML_HEXAGON_NDEV=$env:NDEV
+}
+
+$env:ADSP_LIBRARY_PATH="$basedir\lib"
+
+& "$basedir\bin\$tool" `
+ $cli_opts
diff --git a/llama.cpp/scripts/snapdragon/windows/setup-build.ps1 b/llama.cpp/scripts/snapdragon/windows/setup-build.ps1
new file mode 100644
index 0000000..0f3244c
--- /dev/null
+++ b/llama.cpp/scripts/snapdragon/windows/setup-build.ps1
@@ -0,0 +1,105 @@
+# Requires Run as Administrator is NOT strictly necessary for User-scope env vars,
+# but recommended for creating directories in C:\ root if permissions are restricted.
+
+$ErrorActionPreference = "Stop"
+
+# --- Configuration ---
+$BaseDir = "C:\Qualcomm"
+
+# SDK 1: Hexagon
+$HexagonUrl = "https://github.com/snapdragon-toolchain/hexagon-sdk/releases/download/v6.4.0.2/hexagon-sdk-v6.4.0.2-arm64-wos.tar.xz"
+$HexagonParent = Join-Path $BaseDir "Hexagon_SDK"
+$HexagonSdkVersion = "6.4.0.2"
+$HexagonToolsVersion = "19.0.04"
+$HexagonSdkTarget = Join-Path $HexagonParent $HexagonSdkVersion
+$HexagonToolsTarget = Join-Path $HexagonSdkTarget "\tools\HEXAGON_Tools\$HexagonToolsVersion"
+
+# SDK 2: OpenCL
+$OpenCLUrl = "https://github.com/snapdragon-toolchain/opencl-sdk/releases/download/v2.3.2/adreno-opencl-sdk-v2.3.2-arm64-wos.tar.xz"
+$OpenCLParent = Join-Path $BaseDir "OpenCL_SDK"
+$OpenCLVersion = "2.3.2"
+$OpenCLTarget = Join-Path $OpenCLParent $OpenCLVersion
+
+# --- Helper Function ---
+function Install-QualcommSDK {
+ param (
+ [string]$Url,
+ [string]$ParentDir,
+ [string]$TargetDir,
+ [string]$Name
+ )
+
+ # 1. Create Parent Directory
+ if (-not (Test-Path -Path $ParentDir)) {
+ Write-Host "Creating directory: $ParentDir" -ForegroundColor Cyan
+ New-Item -Path $ParentDir -ItemType Directory -Force | Out-Null
+ }
+
+ # 2. Check for Specific Version Directory
+ if (Test-Path -Path $TargetDir) {
+ Write-Host "$Name ($TargetDir) already exists. Skipping download." -ForegroundColor Green
+ }
+ else {
+ Write-Host "$Name not found. preparing to download..." -ForegroundColor Yellow
+
+ # Create the target directory to extract into
+ New-Item -Path $TargetDir -ItemType Directory -Force | Out-Null
+
+ # Define temporary archive path
+ $TempFile = Join-Path $ParentDir "temp_sdk.tar.xz"
+
+ try {
+ # Download
+ Write-Host "Downloading from: $Url"
+ Invoke-WebRequest -Uri $Url -OutFile $TempFile
+
+ # Untar
+ # Note: We assume Windows includes tar.exe (Win 10 build 17063+)
+ Write-Host "Extracting archive to $TargetDir..."
+
+ # We use -C to extract contents INTO the target directory created above
+ tar -xJvf $TempFile -C $TargetDir\..
+
+ Write-Host "Extraction complete." -ForegroundColor Green
+ }
+ catch {
+ Write-Error "Failed to download or extract $Name. Error: $_"
+ # Cleanup target dir if failed so script tries again next time
+ Remove-Item -Path $TargetDir -Recurse -Force -ErrorAction SilentlyContinue
+ }
+ finally {
+ # Cleanup Archive
+ if (Test-Path $TempFile) { Remove-Item $TempFile -Force }
+ }
+ }
+}
+
+# --- Execution ---
+
+# 1. Ensure Base C:\Qualcomm exists
+if (-not (Test-Path $BaseDir)) {
+ New-Item -Path $BaseDir -ItemType Directory -Force | Out-Null
+}
+
+# 2. Run Install Logic
+Install-QualcommSDK -Url $HexagonUrl -ParentDir $HexagonParent -TargetDir $HexagonSdkTarget -Name "Hexagon SDK"
+Install-QualcommSDK -Url $OpenCLUrl -ParentDir $OpenCLParent -TargetDir $OpenCLTarget -Name "OpenCL SDK"
+
+# --- Environment Variables ---
+
+Write-Host "`nSetting Environment Variables..." -ForegroundColor Cyan
+
+# Set OPENCL_SDK_ROOT
+[System.Environment]::SetEnvironmentVariable('OPENCL_SDK_ROOT', $OpenCLTarget, [System.EnvironmentVariableTarget]::User)
+$env:OPENCL_SDK_ROOT = $OpenCLTarget # Set for current session as well
+Write-Host "OPENCL_SDK_ROOT set to: $OpenCLTarget"
+
+# Set HEXAGON_SDK_ROOT
+[System.Environment]::SetEnvironmentVariable('HEXAGON_SDK_ROOT', $HexagonSdkTarget, [System.EnvironmentVariableTarget]::User)
+$env:HEXAGON_SDK_ROOT = $HexagonSdkTarget # Set for current session as well
+Write-Host "HEXAGON_SDK_ROOT set to: $HexagonSdkTarget"
+
+# Set HEXAGON_SDK_ROOT
+[System.Environment]::SetEnvironmentVariable('HEXAGON_TOOLS_ROOT', $HexagonToolsTarget, [System.EnvironmentVariableTarget]::User)
+$env:HEXAGON_TOOLS_ROOT = $HexagonToolsTarget # Set for current session as well
+Write-Host "HEXAGON_TOOLS_ROOT set to: $HexagonToolsTarget"
diff --git a/llama.cpp/scripts/sync-ggml-am.sh b/llama.cpp/scripts/sync-ggml-am.sh
new file mode 100755
index 0000000..826c560
--- /dev/null
+++ b/llama.cpp/scripts/sync-ggml-am.sh
@@ -0,0 +1,158 @@
+#!/usr/bin/env bash
+#
+# Synchronize ggml changes to llama.cpp
+#
+# Usage:
+#
+# $ cd /path/to/llama.cpp
+# $ ./scripts/sync-ggml-am.sh -skip hash0,hash1,hash2... -C 3
+#
+
+set -e
+
+sd=$(dirname $0)
+cd $sd/../
+
+SRC_LLAMA=$(pwd)
+SRC_GGML=$(cd ../ggml; pwd)
+
+if [ ! -d $SRC_GGML ]; then
+ echo "ggml not found at $SRC_GGML"
+ exit 1
+fi
+
+lc=$(cat $SRC_LLAMA/scripts/sync-ggml.last)
+echo "Syncing ggml changes since commit $lc"
+
+to_skip=""
+
+# context for git patches in number of lines
+ctx="8"
+
+while [ "$1" != "" ]; do
+ case $1 in
+ -skip )
+ shift
+ to_skip=$1
+ ;;
+ -C )
+ shift
+ ctx=$1
+ ;;
+ esac
+ shift
+done
+
+cd $SRC_GGML
+
+git log --oneline $lc..HEAD
+git log --oneline $lc..HEAD --reverse | grep -v "(llama/[0-9]*)" | cut -d' ' -f1 > $SRC_LLAMA/ggml-commits
+
+if [ ! -s $SRC_LLAMA/ggml-commits ]; then
+ rm -v $SRC_LLAMA/ggml-commits
+ echo "No new commits"
+ exit 0
+fi
+
+if [ -f $SRC_LLAMA/ggml-src.patch ]; then
+ rm -v $SRC_LLAMA/ggml-src.patch
+fi
+
+while read c; do
+ if [ -n "$to_skip" ]; then
+ if [[ $to_skip == *"$c"* ]]; then
+ echo "Skipping $c"
+ continue
+ fi
+ fi
+
+ git format-patch -U${ctx} -k $c~1..$c --stdout -- \
+ CMakeLists.txt \
+ src/CMakeLists.txt \
+ cmake/BuildTypes.cmake \
+ cmake/GitVars.cmake \
+ cmake/common.cmake \
+ cmake/ggml-config.cmake.in \
+ src/ggml-cpu/cmake/FindSIMD.cmake \
+ src/ggml* \
+ include/ggml*.h \
+ include/gguf*.h \
+ tests/test-opt.cpp \
+ tests/test-quantize-fns.cpp \
+ tests/test-quantize-perf.cpp \
+ tests/test-backend-ops.cpp \
+ LICENSE \
+ scripts/gen-authors.sh \
+ >> $SRC_LLAMA/ggml-src.patch
+done < $SRC_LLAMA/ggml-commits
+
+rm -v $SRC_LLAMA/ggml-commits
+
+# delete files if empty
+if [ ! -s $SRC_LLAMA/ggml-src.patch ]; then
+ rm -v $SRC_LLAMA/ggml-src.patch
+fi
+
+cd $SRC_LLAMA
+
+if [ -f $SRC_LLAMA/ggml-src.patch ]; then
+ # replace PR numbers
+ #
+ # Subject: some text (#1234)
+ # Subject: some text (ggml/1234)
+ cat ggml-src.patch | sed -e 's/^Subject: \(.*\) (#\([0-9]*\))/Subject: \1 (ggml\/\2)/' > ggml-src.patch.tmp
+ mv ggml-src.patch.tmp ggml-src.patch
+
+ cat ggml-src.patch | sed -e 's/^\(.*\) (#\([0-9]*\))$/\1 (ggml\/\2)/' > ggml-src.patch.tmp
+ mv ggml-src.patch.tmp ggml-src.patch
+
+ # replace filenames:
+ #
+ # CMakelists.txt -> ggml/CMakeLists.txt
+ # src/CMakeLists.txt -> ggml/src/CMakeLists.txt
+
+ # cmake/BuildTypes.cmake -> ggml/cmake/BuildTypes.cmake
+ # cmake/GitVars.cmake -> ggml/cmake/GitVars.cmake
+ # cmake/common.cmake -> ggml/cmake/common.cmake
+ # cmake/ggml-config.cmake.in -> ggml/cmake/ggml-config.cmake.in
+ # src/ggml-cpu/cmake/FindSIMD.cmake -> ggml/src/ggml-cpu/cmake/FindSIMD.cmake
+ #
+ # src/ggml* -> ggml/src/ggml*
+ #
+ # include/ggml*.h -> ggml/include/ggml*.h
+ # include/gguf*.h -> ggml/include/gguf*.h
+ #
+ # tests/test*.cpp -> tests/
+ #
+ # LICENSE -> LICENSE
+ # scripts/gen-authors.sh -> scripts/gen-authors.sh
+
+ cat ggml-src.patch | sed -E \
+ -e 's/([[:space:]]| [ab]\/)CMakeLists.txt/\1ggml\/CMakeLists.txt/g' \
+ -e 's/([[:space:]]| [ab]\/)src\/CMakeLists.txt/\1ggml\/src\/CMakeLists.txt/g' \
+ -e 's/([[:space:]]| [ab]\/)cmake\/BuildTypes.cmake/\1ggml\/cmake\/BuildTypes.cmake/g' \
+ -e 's/([[:space:]]| [ab]\/)cmake\/GitVars.cmake/\1ggml\/cmake\/GitVars.cmake/g' \
+ -e 's/([[:space:]]| [ab]\/)cmake\/common.cmake/\1ggml\/cmake\/common.cmake/g' \
+ -e 's/([[:space:]]| [ab]\/)cmake\/ggml-config.cmake.in/\1ggml\/cmake\/ggml-config.cmake.in/g' \
+ -e 's/([[:space:]]| [ab]\/)src\/ggml-cpu\/cmake\/FindSIMD.cmake/\1ggml\/src\/ggml-cpu\/cmake\/FindSIMD.cmake/g' \
+ -e 's/([[:space:]]| [ab]\/)src\/ggml(.*)/\1ggml\/src\/ggml\2/g' \
+ -e 's/([[:space:]]| [ab]\/)include\/ggml(.*)\.h/\1ggml\/include\/ggml\2.h/g' \
+ -e 's/([[:space:]]| [ab]\/)include\/gguf(.*)\.h/\1ggml\/include\/gguf\2.h/g' \
+ -e 's/([[:space:]]| [ab]\/)tests\/(.*)\.cpp/\1tests\/\2.cpp/g' \
+ -e 's/([[:space:]]| [ab]\/)LICENSE/\1LICENSE/g' \
+ -e 's/([[:space:]]| [ab]\/)scripts\/gen-authors\.sh/\1scripts\/gen-authors.sh/g' \
+ > ggml-src.patch.tmp
+ mv ggml-src.patch.tmp ggml-src.patch
+
+ git am -C${ctx} ggml-src.patch
+
+ rm -v $SRC_LLAMA/ggml-src.patch
+fi
+
+# update last commit
+cd $SRC_GGML
+git log -1 --format=%H > $SRC_LLAMA/scripts/sync-ggml.last
+
+echo "Done"
+
+exit 0
diff --git a/llama.cpp/scripts/sync-ggml.last b/llama.cpp/scripts/sync-ggml.last
new file mode 100644
index 0000000..81e79a9
--- /dev/null
+++ b/llama.cpp/scripts/sync-ggml.last
@@ -0,0 +1 @@
+a8db410a252c8c8f2d120c6f2e7133ebe032f35d
diff --git a/llama.cpp/scripts/sync-ggml.sh b/llama.cpp/scripts/sync-ggml.sh
new file mode 100755
index 0000000..2da9b57
--- /dev/null
+++ b/llama.cpp/scripts/sync-ggml.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+
+cp -rpv ../ggml/CMakeLists.txt ./ggml/CMakeLists.txt
+cp -rpv ../ggml/src/CMakeLists.txt ./ggml/src/CMakeLists.txt
+
+cp -rpv ../ggml/cmake/* ./ggml/cmake/
+cp -rpv ../ggml/src/ggml-cpu/cmake/* ./ggml/src/ggml-cpu/cmake/
+
+cp -rpv ../ggml/src/ggml* ./ggml/src/
+
+cp -rpv ../ggml/include/ggml*.h ./ggml/include/
+cp -rpv ../ggml/include/gguf*.h ./ggml/include/
+
+cp -rpv ../ggml/tests/test-opt.cpp ./tests/test-opt.cpp
+cp -rpv ../ggml/tests/test-quantize-fns.cpp ./tests/test-quantize-fns.cpp
+cp -rpv ../ggml/tests/test-quantize-perf.cpp ./tests/test-quantize-perf.cpp
+cp -rpv ../ggml/tests/test-backend-ops.cpp ./tests/test-backend-ops.cpp
+
+cp -rpv ../LICENSE ./LICENSE
+cp -rpv ../ggml/scripts/gen-authors.sh ./scripts/gen-authors.sh
diff --git a/llama.cpp/scripts/sync_vendor.py b/llama.cpp/scripts/sync_vendor.py
new file mode 100755
index 0000000..1ff6a9a
--- /dev/null
+++ b/llama.cpp/scripts/sync_vendor.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python3
+
+import urllib.request
+
+vendor = {
+ "https://github.com/nlohmann/json/releases/latest/download/json.hpp": "vendor/nlohmann/json.hpp",
+ "https://github.com/nlohmann/json/releases/latest/download/json_fwd.hpp": "vendor/nlohmann/json_fwd.hpp",
+
+ "https://raw.githubusercontent.com/nothings/stb/refs/heads/master/stb_image.h": "vendor/stb/stb_image.h",
+
+ # not using latest tag to avoid this issue: https://github.com/ggml-org/llama.cpp/pull/17179#discussion_r2515877926
+ # "https://github.com/mackron/miniaudio/raw/refs/tags/0.11.23/miniaudio.h": "vendor/miniaudio/miniaudio.h",
+ "https://github.com/mackron/miniaudio/raw/669ed3e844524fcd883231b13095baee9f6de304/miniaudio.h": "vendor/miniaudio/miniaudio.h",
+
+ "https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.30.2/httplib.h": "vendor/cpp-httplib/httplib.h",
+ "https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.30.2/LICENSE": "vendor/cpp-httplib/LICENSE",
+
+ "https://raw.githubusercontent.com/sheredom/subprocess.h/b49c56e9fe214488493021017bf3954b91c7c1f5/subprocess.h": "vendor/sheredom/subprocess.h",
+}
+
+for url, filename in vendor.items():
+ print(f"downloading {url} to {filename}") # noqa: NP100
+ urllib.request.urlretrieve(url, filename)
+
+ # split cpp/h files for httplib
+ # see: https://github.com/yhirose/cpp-httplib/blob/master/split.py
+ if 'httplib.h' in filename:
+ border = '// ----------------------------------------------------------------------------'
+ with open(filename, 'r') as f:
+ content = f.read()
+ header, implementation, footer = content.split(border, 2)
+ fname_cpp = filename.replace('.h', '.cpp')
+ with open(filename, 'w') as fh:
+ fh.write(header)
+ fh.write(footer)
+ with open(fname_cpp, 'w') as fc:
+ fc.write('#include "httplib.h"\n')
+ fc.write('namespace httplib {\n')
+ fc.write(implementation.replace('\ninline ', '\n'))
+ fc.write('} // namespace httplib\n')
diff --git a/llama.cpp/scripts/tool_bench.py b/llama.cpp/scripts/tool_bench.py
new file mode 100755
index 0000000..d9f5583
--- /dev/null
+++ b/llama.cpp/scripts/tool_bench.py
@@ -0,0 +1,379 @@
+#!/usr/bin/env uv run
+'''
+ Simplistic tool call benchmarks for llama-server and ollama.
+
+ Essentially runs the tests at server/tools/server/tests/unit/test_tool_call.py N times, at different temperatures and on different backends (current llama-server, baseline llama-server and ollama),
+ and plots the results of multiple runs (from same .jsonl file or multiple ones) as a success rate heatmap.
+
+ Simple usage example:
+
+ cmake -B build && cmake --build build --config Release -j -t llama-server
+
+ export LLAMA_SERVER_BIN_PATH=$PWD/build/bin/llama-server
+ export LLAMA_CACHE=${LLAMA_CACHE:-$HOME/Library/Caches/llama.cpp}
+
+ ./scripts/tool_bench.py run --n 10 --temp -1 --temp 0 --temp 1 --temp 2 --temp 5 --llama-baseline $PWD/buildMaster/bin/llama-server --output qwen14b.jsonl --hf bartowski/Qwen2.5-14B-Instruct-GGUF:Q4_K_L
+ ./scripts/tool_bench.py run --n 30 --temp -1 --temp 0 --temp 1 --model "Qwen 2.5 1.5B Q4_K_M" --output qwen1.5b.jsonl --hf bartowski/Qwen2.5-1.5B-Instruct-GGUF --ollama qwen2.5:1.5b-instruct-q4_K_M
+ ./scripts/tool_bench.py run --n 30 --temp -1 --temp 0 --temp 1 --model "Qwen 2.5 Coder 7B Q4_K_M" --output qwenc7b.jsonl --hf bartowski/Qwen2.5-Coder-7B-Instruct-GGUF --ollama qwen2.5-coder:7b
+
+ ./scripts/tool_bench.py plot *.jsonl # Opens window w/ heatmap
+ ./scripts/tool_bench.py plot qwen*.jsonl --output qwen.png # Saves heatmap to qwen.png
+
+ (please see ./scripts/tool_bench.sh for a more complete example)
+'''
+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+# "pytest",
+# "pandas",
+# "matplotlib",
+# "seaborn",
+# "requests",
+# "wget",
+# "typer",
+# ]
+# ///
+from contextlib import contextmanager
+from pathlib import Path
+import re
+from statistics import mean, median
+from typing import Annotated, Dict, List, Optional, Tuple
+import atexit
+import json
+import logging
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import subprocess
+import sys
+import time
+import typer
+
+sys.path.insert(0, Path(__file__).parent.parent.as_posix())
+if True:
+ from tools.server.tests.utils import ServerProcess
+ from tools.server.tests.unit.test_tool_call import do_test_calc_result, do_test_hello_world, do_test_weather
+
+
+@contextmanager
+def scoped_server(sp: ServerProcess):
+ def stop():
+ nonlocal sp
+ if sp is not None:
+ sp.stop()
+ sp = None # type: ignore
+ atexit.register(stop)
+ yield sp
+ stop()
+
+
+logging.basicConfig(
+ level=logging.INFO,
+ format='%(asctime)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+app = typer.Typer()
+
+
+@app.command()
+def plot(files: List[Path], output: Optional[Path] = None, test_regex: Optional[str] = None, server_regex: Optional[str] = None):
+
+ lines: List[Dict] = []
+ for file in files:
+ if not file.exists():
+ logger.error(f"File not found: {file}")
+ continue
+
+ try:
+ with file.open() as f:
+ raw_data = f.read()
+ logger.info(f"Reading {file} ({len(raw_data)} bytes)")
+
+ for line_num, line in enumerate(raw_data.split('\n'), 1):
+ line = line.strip()
+ if not line:
+ continue
+ try:
+ record = json.loads(line)
+ lines.append(record)
+ except json.JSONDecodeError as e:
+ logger.warning(f"Invalid JSON at {file}:{line_num} - {e}")
+ except Exception as e:
+ logger.error(f"Error processing {file}: {e}")
+
+ if not lines:
+ raise Exception("No valid data was loaded")
+
+ data_dict: Dict[Tuple, float] = {}
+ models: List[str] = []
+ temps = set()
+ tests = set()
+ server_names = set()
+ total_counts = set()
+ for rec in lines:
+ try:
+ model = rec["model"]
+ temp = rec["temp"]
+ server_name = rec["server_name"]
+ test = rec["test"]
+ success = rec["success_ratio"]
+ success_count = rec["success_count"]
+ failure_count = rec["failure_count"]
+ total_count = success_count + failure_count
+ total_counts.add(total_count)
+
+ if test_regex and not re.search(test_regex, test):
+ continue
+
+ if server_regex and not re.search(server_regex, server_name):
+ continue
+
+ data_dict[(model, temp, server_name, test)] = success
+
+ if model not in models:
+ models.append(model)
+ temps.add(temp)
+ tests.add(test)
+ server_names.add(server_name)
+
+ except KeyError as e:
+ logger.warning(f"Missing required field in record: {e}")
+
+ if len(total_counts) > 1:
+ logger.warning(f"Total counts are not consistent: {total_counts}")
+
+ # Sort the collected values
+ temps = list(sorted(temps, key=lambda x: x if x is not None else -1))
+ tests = list(sorted(tests))
+ server_names = list(sorted(server_names))
+
+ logger.info(f"Processed {len(lines)} lines")
+ logger.info(f"Found {len(data_dict)} valid data points")
+ logger.info(f"Models: {models}")
+ logger.info(f"Temperatures: {temps}")
+ logger.info(f"Tests: {tests}")
+ logger.info(f"Servers: {server_names}")
+
+ matrix: list[list[float]] = []
+ index: list[str] = []
+
+ all_cols = [
+ (server_name, test)
+ for server_name in server_names
+ for test in tests
+ ]
+ for model in models:
+ for temp in temps:
+ index.append(f"{model} @ {temp}")
+ row_vals = [
+ data_dict.get((model, temp, server_name, test), np.nan)
+ for server_name, test in all_cols
+ ]
+ matrix.append(row_vals)
+
+ columns: list[str] = [f"{server_name}\n{test}" for server_name, test in all_cols]
+
+ df = pd.DataFrame(matrix, index=np.array(index), columns=np.array(columns))
+
+ plt.figure(figsize=(12, 6))
+
+ sns.heatmap(
+ df, annot=True, cmap="RdYlGn", vmin=0.0, vmax=1.0, cbar=True, fmt=".2f", center=0.5, square=True, linewidths=0.5,
+ cbar_kws={"label": "Success Ratio"},
+ )
+
+ plt.title(f"Tool Call Bench (n = {str(min(total_counts)) if len(total_counts) == 1 else f'{min(total_counts)}-{max(total_counts)}'})\nSuccess Ratios by Server & Test", pad=20)
+ plt.xlabel("Server & Test", labelpad=10)
+ plt.ylabel("Model @ Temperature", labelpad=10)
+
+ plt.xticks(rotation=45, ha='right')
+ plt.yticks(rotation=0)
+
+ plt.tight_layout()
+
+ if output:
+ plt.savefig(output, dpi=300, bbox_inches='tight')
+ logger.info(f"Plot saved to {output}")
+ else:
+ plt.show()
+
+
+@app.command()
+def run(
+ output: Annotated[Path, typer.Option(help="Output JSON file")],
+ model: Annotated[Optional[str], typer.Option(help="Name of the model to test (server agnostic)")] = None,
+ hf: Annotated[Optional[str], typer.Option(help="GGUF huggingface model repo id (+ optional quant) to test w/ llama-server")] = None,
+ chat_template: Annotated[Optional[str], typer.Option(help="Chat template override for llama-server")] = None,
+ chat_template_file: Annotated[Optional[str], typer.Option(help="Chat template file override for llama-server")] = None,
+ ollama: Annotated[Optional[str], typer.Option(help="Ollama model tag to test")] = None,
+ llama_baseline: Annotated[Optional[str], typer.Option(help="llama-server baseline binary path to use as baseline")] = None,
+ n: Annotated[int, typer.Option(help="Number of times to run each test")] = 10,
+ temp: Annotated[Optional[List[float]], typer.Option(help="Set of temperatures to test")] = None,
+ top_p: Annotated[Optional[float], typer.Option(help="top_p")] = None,
+ top_k: Annotated[Optional[int], typer.Option(help="top_k")] = None,
+ ctk: Annotated[Optional[str], typer.Option(help="ctk")] = None,
+ ctv: Annotated[Optional[str], typer.Option(help="ctv")] = None,
+ fa: Annotated[Optional[bool], typer.Option(help="fa")] = None,
+ seed: Annotated[Optional[int], typer.Option(help="Random seed")] = None,
+ port: Annotated[int, typer.Option(help="llama-server port")] = 8084,
+ force: Annotated[bool, typer.Option(help="Force overwrite of output file")] = False,
+ append: Annotated[bool, typer.Option(help="Append to output file")] = False,
+
+ test_hello_world: Annotated[bool, typer.Option(help="Whether to run the hello world test")] = True,
+ test_weather: Annotated[bool, typer.Option(help="Whether to run the weather test")] = True,
+ test_calc_result: Annotated[bool, typer.Option(help="Whether to run the calc result test")] = False,
+):
+ # Check only one of output and append
+
+ n_predict = 512 # High because of DeepSeek R1
+ # n_ctx = 8192
+ n_ctx = 2048
+
+ if model is None:
+ if hf is not None:
+ model = hf.split("/")[-1]
+ elif ollama is not None:
+ model = ollama
+
+ assert force or append or not output.exists(), f"Output file already exists: {output}; use --force to overwrite"
+
+ with output.open('a' if append else 'w') as output_file:
+
+ def run(server: ServerProcess, *, server_name: str, model_id: str, temp: Optional[float] = None, output_kwargs={}, request_kwargs={}):
+ request_kwargs = {**request_kwargs}
+ if temp is not None:
+ request_kwargs['temperature'] = temp
+ if top_p is not None:
+ request_kwargs['top_p'] = top_p
+ if top_k is not None:
+ request_kwargs['top_k'] = top_k
+ if seed is not None:
+ request_kwargs['seed'] = seed
+
+ request_kwargs['cache_prompt'] = False
+
+ tests = {}
+ if test_hello_world:
+ tests["hello world"] = lambda server: do_test_hello_world(server, **request_kwargs)
+ if test_weather:
+ tests["weather"] = lambda server: do_test_weather(server, **request_kwargs)
+ if test_calc_result:
+ tests["calc result"] = lambda server: do_test_calc_result(server, None, 512, **request_kwargs)
+
+ for test_name, test in tests.items():
+ success_count = 0
+ failure_count = 0
+ failures = []
+ success_times = []
+ failure_times = []
+ logger.info(f"Running {test_name} ({server_name}, {model}): ")
+ for i in range(n):
+ start_time = time.time()
+
+ def elapsed():
+ return time.time() - start_time
+
+ try:
+ test(server)
+ success_times.append(elapsed())
+ success_count += 1
+ logger.info('success')
+ except Exception as e:
+ logger.error(f'failure: {e}')
+ failure_count += 1
+ failure_times.append(elapsed())
+ failures.append(str(e))
+ # import traceback
+ # traceback.print_exc()
+ output_file.write(json.dumps({**output_kwargs, **dict(
+ model=model,
+ server_name=server_name,
+ model_id=model_id,
+ test=test_name,
+ temp=t,
+ top_p=top_p,
+ top_k=top_k,
+ ctk=ctk,
+ ctv=ctv,
+ seed=seed,
+ success_ratio=float(success_count) / n,
+ avg_time=mean(success_times + failure_times),
+ median_time=median(success_times + failure_times),
+ success_count=success_count,
+ success_times=success_times,
+ failure_count=failure_count,
+ failure_times=failure_times,
+ failures=list(set(failures)),
+ )}) + '\n')
+ output_file.flush()
+
+ for t in [None] if temp is None else [t if t >= 0 else None for t in temp]:
+ if hf is not None:
+
+ servers: list[Tuple[str, Optional[str]]] = [('llama-server', None)]
+ if llama_baseline is not None:
+ servers.append(('llama-server (baseline)', llama_baseline))
+
+ for server_name, server_path in servers:
+ server = ServerProcess()
+ server.n_ctx = n_ctx
+ server.n_slots = 1
+ server.jinja = True
+ server.ctk = ctk
+ server.ctv = ctv
+ server.fa = "on" if fa else "off"
+ server.n_predict = n_predict
+ server.model_hf_repo = hf
+ server.model_hf_file = None
+ server.chat_template = chat_template
+ server.chat_template_file = chat_template_file
+ server.server_path = server_path
+ if port is not None:
+ server.server_port = port
+ # server.debug = True
+
+ with scoped_server(server):
+ server.start(timeout_seconds=15 * 60)
+ for ignore_chat_grammar in [False]:
+ run(
+ server,
+ server_name=server_name,
+ model_id=hf,
+ temp=t,
+ output_kwargs=dict(
+ chat_template=chat_template,
+ chat_template_file=chat_template_file,
+ ),
+ request_kwargs=dict(
+ ignore_chat_grammar=ignore_chat_grammar,
+ ),
+ )
+
+ if ollama is not None:
+ server = ServerProcess()
+ server.server_port = 11434
+ server.server_host = "localhost"
+ subprocess.check_call(["ollama", "pull", ollama])
+
+ with scoped_server(server):
+ run(
+ server,
+ server_name="ollama",
+ model_id=ollama,
+ temp=t,
+ output_kwargs=dict(
+ chat_template=None,
+ chat_template_file=None,
+ ),
+ request_kwargs=dict(
+ model=ollama,
+ max_tokens=n_predict,
+ num_ctx = n_ctx,
+ ),
+ )
+
+
+if __name__ == "__main__":
+ app()
diff --git a/llama.cpp/scripts/tool_bench.sh b/llama.cpp/scripts/tool_bench.sh
new file mode 100755
index 0000000..05b41d2
--- /dev/null
+++ b/llama.cpp/scripts/tool_bench.sh
@@ -0,0 +1,66 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+cmake --build build -j
+
+export LLAMA_CACHE=${LLAMA_CACHE:-$HOME/Library/Caches/llama.cpp}
+export LLAMA_SERVER_BIN_PATH=$PWD/build/bin/llama-server
+
+if [ ! -x "$LLAMA_SERVER_BIN_PATH" ]; then
+ echo "Could not find llama-server binary at $LLAMA_SERVER_BIN_PATH"
+ exit 1
+fi
+if [ ! -d "$LLAMA_CACHE" ]; then
+ echo "Could not find llama cache at $LLAMA_CACHE, please set LLAMA_CACHE explicitly."
+ exit 1
+fi
+
+export ARGS=(
+ --llama-baseline="$(which llama-server)"
+ --n 30
+ --temp -1 # Leaves temperature parameter unset (use the server's default, e.g. 0.6 for ollama)
+ --temp 0
+ --temp 0.5
+ --temp 0.75
+ --temp 1
+ --temp 1.5
+ --temp 2
+ --temp 5
+ "$@"
+)
+
+./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 Coder 0.5B Q4_K_M" --output ../qwenc0.5b.jsonl --hf bartowski/Qwen2.5-Coder-0.5B-Instruct-GGUF:Q4_K_M --ollama qwen2.5-coder:0.5b-instruct-q4_K_M
+./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 Coder 1.5B Q4_K_M" --output ../qwenc1.5b.jsonl --hf bartowski/Qwen2.5-Coder-1.5B-Instruct-GGUF:Q4_K_M --ollama qwen2.5-coder:1.5b-instruct-q4_K_M
+./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 Coder 3B Q4_K_M" --output ../qwenc3b.jsonl --hf bartowski/Qwen2.5-Coder-3B-Instruct-GGUF:Q4_K_M --ollama qwen2.5-coder:3b-instruct-q4_K_M
+./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 Coder 7B Q4_K_M" --output ../qwenc7b.jsonl --hf bartowski/Qwen2.5-Coder-7B-Instruct-GGUF:Q4_K_M --ollama qwen2.5-coder:7b-instruct-q4_K_M
+./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 Coder 32B Q4_K_M" --output ../qwenc32b.jsonl --hf bartowski/Qwen2.5-Coder-32B-Instruct-GGUF:Q4_K_M --ollama qwen2.5-coder:32B-instruct-q4_K_M
+./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 1.5B Q4_K_M" --output ../qwen1.5b.jsonl --hf bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M --ollama qwen2.5:1.5b-instruct-q4_K_M
+./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 3B Q4_K_M" --output ../qwen3b.jsonl --hf bartowski/Qwen2.5-3B-Instruct-GGUF:Q4_K_M --ollama qwen2.5:3b-instruct-q4_K_M
+./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 7B Q4_K_M" --output ../qwen7b.jsonl --hf bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M --ollama qwen2.5:7b-instruct-q4_K_M
+
+./scripts/tool_bench.py run ${ARGS[@]} --model "Llama 3.2 Instruct 1B Q4_K_M" --output ../llama1b.jsonl --hf bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M --ollama llama3.2:1b-instruct-q4_K_M
+./scripts/tool_bench.py run ${ARGS[@]} --model "Llama 3.2 Instruct 3B Q4_K_M" --output ../llama3b.jsonl --hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M --ollama llama3.2:3b-instruct-q4_K_M
+./scripts/tool_bench.py run ${ARGS[@]} --model "Llama 3.1 Instruct 8B Q4_K_M" --output ../llama8b.jsonl --hf bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M --ollama llama3.1:8b-instruct-q4_K_M
+./scripts/tool_bench.py run ${ARGS[@]} --model "Llama 3.3 70B Q4_K_M" --output ../llama70b.jsonl --hf bartowski/Llama-3.3-70B-Instruct-GGUF:Q4_K_M
+
+./scripts/tool_bench.py run ${ARGS[@]} --model "Mistral Nemo Q4_K_M" --output ../nemo.jsonl --hf bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M --ollama mistral-nemo:12b-instruct-2407-q4_K_M
+
+./scripts/tool_bench.py run ${ARGS[@]} --model "Hermes 3 Llama 3.1 8B Q4_K_M" --output ../hermes3.jsonl --hf bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M --ollama hermes3:8b-llama3.1-q4_K_M --chat-template-file <( python scripts/get_chat_template.py NousResearch/Hermes-3-Llama-3.1-8B tool_use )
+./scripts/tool_bench.py run ${ARGS[@]} --model "Hermes 2 Pro Llama 3 8B Q4_K_M" --output ../hermes2.jsonl --hf bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M --ollama hermes2:8b-llama3-q4_K_M --chat-template-file <( python scripts/get_chat_template.py NousResearch/Hermes-2-Pro-Llama-3-8B tool_use )
+
+./scripts/tool_bench.py run ${ARGS[@]} --model "Functionary Small V3.2 Q4_K_M" --output ../funct3.2.jsonl --hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M
+./scripts/tool_bench.py run ${ARGS[@]} --model "FireFunction V2 IQ1_M" --output ../firef2.jsonl --hf bartowski/firefunction-v2-GGUF:IQ1_M --chat-template-file <( python scripts/get_chat_template.py fireworks-ai/llama-3-firefunction-v2 tool_use )
+
+./scripts/tool_bench.py run ${ARGS[@]} --model "Command R7B 12-2024 Q6_K_L" --output ../c4ai.jsonl --hf bartowski/c4ai-command-r7b-12-2024-GGUF:Q6_K_L --chat-template-file <( python scripts/get_chat_template.py CohereForAI/c4ai-command-r7b-12-2024 tool_use )
+
+./scripts/tool_bench.py run ${ARGS[@]} --model "Gemma 2 2B Q8_0" --output ../gemma2.jsonl --hf bartowski/gemma-2-2b-it-GGUF:Q8_0
+./scripts/tool_bench.py run ${ARGS[@]} --model "Phi 4 Instruct Q4_K_M" --output ../phi4.jsonl --hf bartowski/phi-4-GGUF:Q4_K_M # --ollama phi4
+./scripts/tool_bench.py run ${ARGS[@]} --model "Phi 3.5 Mini Instruct Q4_K_M" --output ../phi3.5.jsonl --hf bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M # --ollama phi3.5:3.8b-mini-instruct-q4_K_M
+
+# ./scripts/tool_bench.py run ${ARGS[@]} --model "DeepSeek R1 Distill Qwen 7B Q6_K_L" --output ../dsqw7.jsonl --hf bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q6_K_L --chat-template-file <( python scripts/get_chat_template.py NousResearch/DeepSeek-R1-Distill-Qwen-7B tool_use )
+# ./scripts/tool_bench.py run ${ARGS[@]} --model "DeepSeek R1 Distill Qwen 32B Q4_K_M" --output ../dsqw32.jsonl --hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M --chat-template-file <( python scripts/get_chat_template.py NousResearch/DeepSeek-R1-Distill-Qwen-32B tool_use )
+
+
+for f in ../*.jsonl; do
+ ./scripts/tool_bench.py plot "$f" --output ${f%.jsonl}.png || true
+done
diff --git a/llama.cpp/scripts/verify-checksum-models.py b/llama.cpp/scripts/verify-checksum-models.py
new file mode 100755
index 0000000..0b5b9aa
--- /dev/null
+++ b/llama.cpp/scripts/verify-checksum-models.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python3
+
+import logging
+import os
+import hashlib
+
+logger = logging.getLogger("verify-checksum-models")
+
+
+def sha256sum(file):
+ block_size = 16 * 1024 * 1024 # 16 MB block size
+ b = bytearray(block_size)
+ file_hash = hashlib.sha256()
+ mv = memoryview(b)
+ with open(file, 'rb', buffering=0) as f:
+ while True:
+ n = f.readinto(mv)
+ if not n:
+ break
+ file_hash.update(mv[:n])
+
+ return file_hash.hexdigest()
+
+
+# Define the path to the llama directory (parent folder of script directory)
+llama_path = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
+
+# Define the file with the list of hashes and filenames
+hash_list_file = os.path.join(llama_path, "SHA256SUMS")
+
+# Check if the hash list file exists
+if not os.path.exists(hash_list_file):
+ logger.error(f"Hash list file not found: {hash_list_file}")
+ exit(1)
+
+# Read the hash file content and split it into an array of lines
+with open(hash_list_file, "r") as f:
+ hash_list = f.read().splitlines()
+
+# Create an array to store the results
+results = []
+
+# Loop over each line in the hash list
+for line in hash_list:
+ # Split the line into hash and filename
+ hash_value, filename = line.split(" ")
+
+ # Get the full path of the file by joining the llama path and the filename
+ file_path = os.path.join(llama_path, filename)
+
+ # Informing user of the progress of the integrity check
+ logger.info(f"Verifying the checksum of {file_path}")
+
+ # Check if the file exists
+ if os.path.exists(file_path):
+ # Calculate the SHA256 checksum of the file using hashlib
+ file_hash = sha256sum(file_path)
+
+ # Compare the file hash with the expected hash
+ if file_hash == hash_value:
+ valid_checksum = "V"
+ file_missing = ""
+ else:
+ valid_checksum = ""
+ file_missing = ""
+ else:
+ valid_checksum = ""
+ file_missing = "X"
+
+ # Add the results to the array
+ results.append({
+ "filename": filename,
+ "valid checksum": valid_checksum,
+ "file missing": file_missing
+ })
+
+
+# Print column headers for results table
+print("filename".ljust(40) + "valid checksum".center(20) + "file missing".center(20)) # noqa: NP100
+print("-" * 80) # noqa: NP100
+
+# Output the results as a table
+for r in results:
+ print(f"{r['filename']:40} {r['valid checksum']:^20} {r['file missing']:^20}") # noqa: NP100
diff --git a/llama.cpp/scripts/xxd.cmake b/llama.cpp/scripts/xxd.cmake
new file mode 100644
index 0000000..14d2753
--- /dev/null
+++ b/llama.cpp/scripts/xxd.cmake
@@ -0,0 +1,16 @@
+# CMake equivalent of `xxd -i ${INPUT} ${OUTPUT}`
+# Usage: cmake -DINPUT=tools/server/public/index.html -DOUTPUT=tools/server/index.html.hpp -P scripts/xxd.cmake
+
+SET(INPUT "" CACHE STRING "Input File")
+SET(OUTPUT "" CACHE STRING "Output File")
+
+get_filename_component(filename "${INPUT}" NAME)
+string(REGEX REPLACE "\\.|-" "_" name "${filename}")
+
+file(READ "${INPUT}" hex_data HEX)
+string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," hex_sequence "${hex_data}")
+
+string(LENGTH ${hex_data} hex_len)
+math(EXPR len "${hex_len} / 2")
+
+file(WRITE "${OUTPUT}" "unsigned char ${name}[] = {${hex_sequence}};\nunsigned int ${name}_len = ${len};\n")