diff options
Diffstat (limited to 'llama.cpp/scripts')
51 files changed, 8422 insertions, 0 deletions
diff --git a/llama.cpp/scripts/apple/validate-apps.sh b/llama.cpp/scripts/apple/validate-apps.sh new file mode 100755 index 0000000..f047575 --- /dev/null +++ b/llama.cpp/scripts/apple/validate-apps.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +./scripts/apple/validate-ios.sh +./scripts/apple/validate-macos.sh +./scripts/apple/validate-visionos.sh +./scripts/apple/validate-tvos.sh diff --git a/llama.cpp/scripts/apple/validate-ios.sh b/llama.cpp/scripts/apple/validate-ios.sh new file mode 100755 index 0000000..50800d8 --- /dev/null +++ b/llama.cpp/scripts/apple/validate-ios.sh @@ -0,0 +1,820 @@ +#!/usr/bin/env bash +# validate-ios.sh - Validate iOS Application with embedded llama.xcframework using SwiftUI + +# Authentication options (optional) (can be set via environment variables) +# To use: export APPLE_ID=your.email@example.com +# export APPLE_PASSWORD=your-app-specific-password +# ./validate-ios.sh +APPLE_ID=${APPLE_ID:-""} +APPLE_PASSWORD=${APPLE_PASSWORD:-""} + +# Ensure the script exits on error +set -e + +# Function to print usage instructions +print_usage() { + echo "Usage: ./validate-ios.sh [OPTIONS]" + echo "" + echo "Options:" + echo " --help Show this help message" + echo " --apple-id EMAIL Apple ID email for validation" + echo " --apple-password PWD App-specific password for Apple ID" + echo "" + echo "Environment variables:" + echo " APPLE_ID Apple ID email for validation" + echo " APPLE_PASSWORD App-specific password for Apple ID" + echo "" + echo "Notes:" + echo " - Command line options take precedence over environment variables" + echo " - Authentication is optional. If not provided, alternative validation will be performed" + echo " - For APPLE_PASSWORD, use an app-specific password generated at https://appleid.apple.com/account/manage" +} + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + --help) + print_usage + exit 0 + ;; + --apple-id) + APPLE_ID="$2" + shift 2 + ;; + --apple-password) + APPLE_PASSWORD="$2" + shift 2 + ;; + *) + echo "Unknown option: $1" + print_usage + exit 1 + ;; + esac +done + +# Function to clean up in case of error +cleanup() { + # Don't clean up temp files on error to help with debugging + echo "===== iOS Validation Process Failed =====" + exit 1 +} + +# Set up trap to call cleanup function on error +trap cleanup ERR + +set -e # Exit on any error + +ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../.." && pwd )" +BUILD_DIR="${ROOT_DIR}/validation-builds/ios" + +# Configuration +APP_NAME="iOSLlamaTest" +BUNDLE_ID="org.ggml.iOSLlamaTest" +XCFRAMEWORK_PATH="${ROOT_DIR}/build-apple/llama.xcframework" +TEMP_DIR="${BUILD_DIR}/temp" +ARCHIVE_PATH="${BUILD_DIR}/${APP_NAME}.xcarchive" +IPA_PATH="${BUILD_DIR}/${APP_NAME}.ipa" +VALIDATION_DIR="${BUILD_DIR}/validation" + +# Create necessary directories +mkdir -p "${BUILD_DIR}" +mkdir -p "${TEMP_DIR}" +mkdir -p "${VALIDATION_DIR}" + +echo "===== iOS Validation Process Started =====" + +# 1. Create a simple test app project +echo "Creating test iOS app project..." +mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}" +cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Info.plist" << EOF +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> +<plist version="1.0"> +<dict> + <key>CFBundleDevelopmentRegion</key> + <string>en</string> + <key>CFBundleExecutable</key> + <string>${APP_NAME}</string> + <key>CFBundleIdentifier</key> + <string>${BUNDLE_ID}</string> + <key>CFBundleInfoDictionaryVersion</key> + <string>6.0</string> + <key>CFBundleName</key> + <string>${APP_NAME}</string> + <key>CFBundlePackageType</key> + <string>APPL</string> + <key>CFBundleShortVersionString</key> + <string>1.0</string> + <key>CFBundleVersion</key> + <string>1</string> + <key>LSRequiresIPhoneOS</key> + <true/> + <key>UILaunchScreen</key> + <dict/> + <key>UIRequiredDeviceCapabilities</key> + <array> + <string>armv7</string> + </array> + <key>UISupportedInterfaceOrientations</key> + <array> + <string>UIInterfaceOrientationPortrait</string> + </array> +</dict> +</plist> +EOF + +# Create SwiftUI app files +mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources" + +# Create App.swift +cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/App.swift" << EOF +import SwiftUI +import llama + +@main +struct LlamaTestApp: App { + var body: some Scene { + WindowGroup { + ContentView() + } + } +} +EOF + +# Create ContentView.swift +cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/ContentView.swift" << EOF +import SwiftUI +import llama + +struct ContentView: View { + // Test that we can initialize a llama context params struct + let params = llama_context_default_params() + + var body: some View { + VStack(spacing: 20) { + Text("Llama Framework Test") + .font(.largeTitle) + .padding() + + Text("llama_context_default_params() created successfully") + .font(.headline) + .multilineTextAlignment(.center) + .padding() + + // Display some param values to confirm the framework is working + Text("n_ctx: \(params.n_ctx)") + .font(.body) + + Text("n_batch: \(params.n_batch)") + .font(.body) + + Spacer() + } + .padding() + } +} + +struct ContentView_Previews: PreviewProvider { + static var previews: some View { + ContentView() + } +} +EOF + +# Create project.pbxproj, fixing the framework search paths issues +mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj" +cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF' +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 54; + objects = { + +/* Begin PBXBuildFile section */ + 11111111111111111111111 /* App.swift in Sources */ = {isa = PBXBuildFile; fileRef = 22222222222222222222222; }; + 33333333333333333333333 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 44444444444444444444444; }; + 55555555555555555555555 /* llama.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; }; + 77777777777777777777777 /* llama.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; }; +/* End PBXBuildFile section */ + +/* Begin PBXCopyFilesBuildPhase section */ + 88888888888888888888888 /* Embed Frameworks */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = ""; + dstSubfolderSpec = 10; + files = ( + 77777777777777777777777 /* llama.xcframework in Embed Frameworks */, + ); + name = "Embed Frameworks"; + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXCopyFilesBuildPhase section */ + +/* Begin PBXFileReference section */ +EOF + +# Continue with the project.pbxproj file, using the APP_NAME variable appropriately +cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF + 99999999999999999999999 /* ${APP_NAME}.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "${APP_NAME}.app"; sourceTree = BUILT_PRODUCTS_DIR; }; + 22222222222222222222222 /* App.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = App.swift; sourceTree = "<group>"; }; + 44444444444444444444444 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; }; + AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; }; + 66666666666666666666666 /* llama.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; path = llama.xcframework; sourceTree = "<group>"; }; +/* End PBXFileReference section */ +EOF + +# Add the rest of the project file with fixed framework search paths +cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF' +/* Begin PBXFrameworksBuildPhase section */ + BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 55555555555555555555555 /* llama.xcframework in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ +EOF + +# Continue with the project.pbxproj file, using the APP_NAME variable appropriately +cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF + CCCCCCCCCCCCCCCCCCCCCCCC /* Products */ = { + isa = PBXGroup; + children = ( + 99999999999999999999999 /* ${APP_NAME}.app */, + ); + name = Products; + sourceTree = "<group>"; + }; +EOF + +cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF' + DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */ = { + isa = PBXGroup; + children = ( + 66666666666666666666666 /* llama.xcframework */, + ); + name = Frameworks; + sourceTree = "<group>"; + }; + EEEEEEEEEEEEEEEEEEEEEEEE = { + isa = PBXGroup; + children = ( + FFFFFFFFFFFFFFFFFFFFFFFF /* iOSLlamaTest */, + CCCCCCCCCCCCCCCCCCCCCCCC /* Products */, + DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */, + ); + sourceTree = "<group>"; + }; + FFFFFFFFFFFFFFFFFFFFFFFF /* iOSLlamaTest */ = { + isa = PBXGroup; + children = ( + 1111111111111111111111AA /* Sources */, + AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */, + ); + path = "iOSLlamaTest"; + sourceTree = "<group>"; + }; + 1111111111111111111111AA /* Sources */ = { + isa = PBXGroup; + children = ( + 22222222222222222222222 /* App.swift */, + 44444444444444444444444 /* ContentView.swift */, + ); + path = Sources; + sourceTree = "<group>"; + }; +/* End PBXGroup section */ +EOF + +# Continue with the project.pbxproj file, using the APP_NAME variable appropriately +cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF +/* Begin PBXNativeTarget section */ + 3333333333333333333333AA /* ${APP_NAME} */ = { + isa = PBXNativeTarget; + buildConfigurationList = 4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */; + buildPhases = ( + 5555555555555555555555AA /* Sources */, + BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */, + 6666666666666666666666AA /* Resources */, + 88888888888888888888888 /* Embed Frameworks */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = "${APP_NAME}"; + productName = "${APP_NAME}"; + productReference = 99999999999999999999999 /* ${APP_NAME}.app */; + productType = "com.apple.product-type.application"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 7777777777777777777777AA /* Project object */ = { + isa = PBXProject; + attributes = { + LastSwiftUpdateCheck = 1240; + LastUpgradeCheck = 1240; + TargetAttributes = { + 3333333333333333333333AA = { + CreatedOnToolsVersion = 12.4; + }; + }; + }; + buildConfigurationList = 8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */; + compatibilityVersion = "Xcode 12.0"; + developmentRegion = en; + hasScannedForEncodings = 0; + knownRegions = ( + en, + Base, + ); + mainGroup = EEEEEEEEEEEEEEEEEEEEEEEE; + productRefGroup = CCCCCCCCCCCCCCCCCCCCCCCC /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 3333333333333333333333AA /* ${APP_NAME} */, + ); + }; +/* End PBXProject section */ +EOF + +# Add the rest of the file with correct FRAMEWORK_SEARCH_PATHS +cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF' +/* Begin PBXResourcesBuildPhase section */ + 6666666666666666666666AA /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + 5555555555555555555555AA /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 33333333333333333333333 /* ContentView.swift in Sources */, + 11111111111111111111111 /* App.swift in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + 9999999999999999999999AA /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 16.4; + MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; + MTL_FAST_MATH = YES; + ONLY_ACTIVE_ARCH = YES; + SDKROOT = iphoneos; + SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG; + SWIFT_OPTIMIZATION_LEVEL = "-Onone"; + }; + name = Debug; + }; + AAAAAAAAAAAAAAAAAAAAABBB /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 16.4; + MTL_ENABLE_DEBUG_INFO = NO; + MTL_FAST_MATH = YES; + SDKROOT = iphoneos; + SWIFT_COMPILATION_MODE = wholemodule; + SWIFT_OPTIMIZATION_LEVEL = "-O"; + VALIDATE_PRODUCT = YES; + }; + name = Release; + }; + BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; + CODE_SIGN_STYLE = Manual; + DEVELOPMENT_TEAM = ""; + ENABLE_PREVIEWS = YES; + FRAMEWORK_SEARCH_PATHS = "$(PROJECT_DIR)"; + INFOPLIST_FILE = "iOSLlamaTest/Info.plist"; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + ); + PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.iOSLlamaTest"; + PRODUCT_NAME = "$(TARGET_NAME)"; + PROVISIONING_PROFILE_SPECIFIER = ""; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Debug; + }; + CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; + CODE_SIGN_STYLE = Manual; + DEVELOPMENT_TEAM = ""; + ENABLE_PREVIEWS = YES; + FRAMEWORK_SEARCH_PATHS = ( + "$(inherited)", + "$(PROJECT_DIR)", + ); + INFOPLIST_FILE = "iOSLlamaTest/Info.plist"; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + ); + PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.iOSLlamaTest"; + PRODUCT_NAME = "$(TARGET_NAME)"; + PROVISIONING_PROFILE_SPECIFIER = ""; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ +EOF + +# Finish the project.pbxproj file +cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF +/* Begin XCConfigurationList section */ + 8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 9999999999999999999999AA /* Debug */, + AAAAAAAAAAAAAAAAAAAAABBB /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */, + CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 7777777777777777777777AA /* Project object */; +} +EOF + +# 2. Copy XCFramework to test project +echo "Copying XCFramework to test project..." +cp -R "${XCFRAMEWORK_PATH}" "${TEMP_DIR}/${APP_NAME}/" + +# 3. Build and archive the app +echo "Building and archiving test app..." +cd "${TEMP_DIR}/${APP_NAME}" + +# Create a simple xcscheme file to avoid xcodebuild scheme issues +mkdir -p "${APP_NAME}.xcodeproj/xcshareddata/xcschemes" +cat > "${APP_NAME}.xcodeproj/xcshareddata/xcschemes/${APP_NAME}.xcscheme" << EOF +<?xml version="1.0" encoding="UTF-8"?> +<Scheme + LastUpgradeVersion = "1240" + version = "1.3"> + <BuildAction + parallelizeBuildables = "YES" + buildImplicitDependencies = "YES"> + <BuildActionEntries> + <BuildActionEntry + buildForTesting = "YES" + buildForRunning = "YES" + buildForProfiling = "YES" + buildForArchiving = "YES" + buildForAnalyzing = "YES"> + <BuildableReference + BuildableIdentifier = "primary" + BlueprintIdentifier = "3333333333333333333333AA" + BuildableName = "${APP_NAME}.app" + BlueprintName = "${APP_NAME}" + ReferencedContainer = "container:${APP_NAME}.xcodeproj"> + </BuildableReference> + </BuildActionEntry> + </BuildActionEntries> + </BuildAction> + <TestAction + buildConfiguration = "Debug" + selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB" + selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB" + shouldUseLaunchSchemeArgsEnv = "YES"> + <Testables> + </Testables> + </TestAction> + <LaunchAction + buildConfiguration = "Debug" + selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB" + selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB" + launchStyle = "0" + useCustomWorkingDirectory = "NO" + ignoresPersistentStateOnLaunch = "NO" + debugDocumentVersioning = "YES" + debugServiceExtension = "internal" + allowLocationSimulation = "YES"> + <BuildableProductRunnable + runnableDebuggingMode = "0"> + <BuildableReference + BuildableIdentifier = "primary" + BlueprintIdentifier = "3333333333333333333333AA" + BuildableName = "${APP_NAME}.app" + BlueprintName = "${APP_NAME}" + ReferencedContainer = "container:${APP_NAME}.xcodeproj"> + </BuildableReference> + </BuildableProductRunnable> + </LaunchAction> + <ProfileAction + buildConfiguration = "Release" + shouldUseLaunchSchemeArgsEnv = "YES" + savedToolIdentifier = "" + useCustomWorkingDirectory = "NO" + debugDocumentVersioning = "YES"> + <BuildableProductRunnable + runnableDebuggingMode = "0"> + <BuildableReference + BuildableIdentifier = "primary" + BlueprintIdentifier = "3333333333333333333333AA" + BuildableName = "${APP_NAME}.app" + BlueprintName = "${APP_NAME}" + ReferencedContainer = "container:${APP_NAME}.xcodeproj"> + </BuildableReference> + </BuildableProductRunnable> + </ProfileAction> + <AnalyzeAction + buildConfiguration = "Debug"> + </AnalyzeAction> + <ArchiveAction + buildConfiguration = "Release" + revealArchiveInOrganizer = "YES"> + </ArchiveAction> +</Scheme> +EOF + +# Now use xcodebuild with an explicitly defined product name +xcodebuild -project "${APP_NAME}.xcodeproj" -scheme "${APP_NAME}" -sdk iphoneos -configuration Release archive -archivePath "${ARCHIVE_PATH}" CODE_SIGN_IDENTITY="-" CODE_SIGNING_REQUIRED=NO CODE_SIGNING_ALLOWED=NO PRODUCT_NAME="${APP_NAME}" SWIFT_OPTIMIZATION_LEVEL="-Onone" -quiet + +# 4. Create IPA from archive +echo "Creating IPA from archive..." +mkdir -p "${TEMP_DIR}/Payload" +cp -R "${ARCHIVE_PATH}/Products/Applications/${APP_NAME}.app" "${TEMP_DIR}/Payload/" + +# Check and log app structure before zipping +echo "App structure:" +ls -la "${TEMP_DIR}/Payload/${APP_NAME}.app/" +echo "Frameworks:" +ls -la "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/" 2>/dev/null || echo "No Frameworks directory found" + +cd "${TEMP_DIR}" +zip -r "${IPA_PATH}" Payload + +# Check embedded provisioning profile +echo "Checking provisioning profile (if any)..." +PROVISIONING_PROFILE=$(find "${ARCHIVE_PATH}/Products/Applications/${APP_NAME}.app" -name "embedded.mobileprovision" 2>/dev/null) +if [ -n "$PROVISIONING_PROFILE" ]; then + echo "Found embedded provisioning profile:" + security cms -D -i "$PROVISIONING_PROFILE" || echo "Unable to decode provisioning profile" +else + echo "No embedded provisioning profile found (expected for ad-hoc builds)" +fi + +# 5. Validate the IPA +echo "Validating IPA..." +VALIDATION_OUTPUT="${VALIDATION_DIR}/validation_output.txt" + +# Check if authentication credentials are provided +AUTH_ARGS="" +if [ -n "$APPLE_ID" ] && [ -n "$APPLE_PASSWORD" ]; then + echo "Using Apple ID authentication for validation..." + AUTH_ARGS="--username \"$APPLE_ID\" --password \"$APPLE_PASSWORD\"" +else + echo "No authentication credentials provided. Will perform basic validation." + echo "To use your personal developer account, you can run the script with:" + echo " APPLE_ID='your.email@example.com' APPLE_PASSWORD='your-app-specific-password' ./validate-ios.sh" + echo "Note: You need to create an app-specific password at https://appleid.apple.com/account/manage" +fi + +# Run validation with detailed output +echo "Running validation with altool..." +if [ -n "$AUTH_ARGS" ]; then + # Use eval to properly handle the quoted arguments + eval "xcrun altool --validate-app -f \"${IPA_PATH}\" --type ios --output-format xml $AUTH_ARGS" 2>&1 | tee "${VALIDATION_OUTPUT}" +else + xcrun altool --validate-app -f "${IPA_PATH}" --type ios --output-format xml 2>&1 | tee "${VALIDATION_OUTPUT}" +fi +VALIDATION_RESULT=$? + +# Final validation result +FINAL_VALIDATION_RESULT=0 + +# Check if validation failed because the app isn't in App Store Connect +if grep -q "No suitable application records were found" "${VALIDATION_OUTPUT}"; then + echo "⚠️ App Store Connect Warning: The app bundle identifier is not found in App Store Connect" + echo "This is expected for apps that haven't been registered in App Store Connect yet." + echo "This doesn't indicate a problem with the build or framework." + + # Perform alternative validation + echo "Performing alternative validation checks..." + + # Check if IPA was created successfully + if [ -f "${IPA_PATH}" ] && [ -s "${IPA_PATH}" ]; then + echo "✅ IPA file created successfully" + else + echo "❌ IPA file not created or empty" + FINAL_VALIDATION_RESULT=1 + fi + + # Check if app binary exists and is executable + if [ -f "${TEMP_DIR}/Payload/${APP_NAME}.app/${APP_NAME}" ] && [ -x "${TEMP_DIR}/Payload/${APP_NAME}.app/${APP_NAME}" ]; then + echo "✅ App binary exists and is executable" + else + echo "❌ App binary missing or not executable" + FINAL_VALIDATION_RESULT=1 + fi + + # Check if framework was properly embedded + if [ -d "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework" ]; then + echo "✅ llama.framework properly embedded" + else + echo "❌ llama.framework not properly embedded" + FINAL_VALIDATION_RESULT=1 + fi + + # Check if framework binary exists + if [ -f "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework/llama" ]; then + echo "✅ Framework binary exists" + + # Further validate framework by checking architecture + ARCHS=$(lipo -info "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework/llama" 2>/dev/null | grep -o "arm64\\|armv7\\|x86_64" | tr '\n' ' ') + if [ -n "$ARCHS" ]; then + echo "✅ Framework architecture(s): $ARCHS" + else + echo "⚠️ Could not determine framework architecture" + fi + else + echo "❌ Framework binary missing" + FINAL_VALIDATION_RESULT=1 + fi + + if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then + echo "✅ Alternative validation PASSED: App built successfully with embedded framework" + else + echo "❌ Alternative validation FAILED: Issues found with the app or framework" + fi +elif grep -q "You must specify authentication credentials" "${VALIDATION_OUTPUT}" && [ -z "$AUTH_ARGS" ]; then + echo "✅ iOS Validation PASSED: IPA successfully validated" + echo "Results saved to ${VALIDATION_OUTPUT}" +else + echo "❌ iOS Validation FAILED: IPA validation found issues" + echo "See validation output at ${VALIDATION_OUTPUT}" + echo "" + echo "==== VALIDATION ERRORS ====" + + # Try to extract specific errors from the output + if grep -q "Error" "${VALIDATION_OUTPUT}"; then + grep -A 5 "Error" "${VALIDATION_OUTPUT}" + else + # If no specific error found, show the whole log + cat "${VALIDATION_OUTPUT}" + fi + + # Additional debugging: check IPA contents + echo "" + echo "==== IPA CONTENTS ====" + mkdir -p "${TEMP_DIR}/ipa_contents" + unzip -q "${IPA_PATH}" -d "${TEMP_DIR}/ipa_contents" + ls -la "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app/" + + # Check for code signing issues + echo "" + echo "==== CODE SIGNING INFO ====" + codesign -vv -d "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app" 2>&1 || echo "Code signing verification failed" + + # Check embedded frameworks + echo "" + echo "==== FRAMEWORK INFO ====" + ls -la "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app/Frameworks/" 2>/dev/null || echo "No Frameworks directory found" +fi + +# Don't clean up on error to allow inspection +if [ $FINAL_VALIDATION_RESULT -ne 0 ]; then + echo "" + echo "Temporary files kept for inspection at: ${TEMP_DIR}" + echo "===== iOS Validation Process Failed =====" + exit 1 +fi + +# Clean up temporary files but keep build artifacts +if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then + echo "Cleaning up temporary files..." + #rm -rf "${TEMP_DIR}" +fi + +echo "===== iOS Validation Process Completed =====" +exit $FINAL_VALIDATION_RESULT diff --git a/llama.cpp/scripts/apple/validate-macos.sh b/llama.cpp/scripts/apple/validate-macos.sh new file mode 100755 index 0000000..fa800ee --- /dev/null +++ b/llama.cpp/scripts/apple/validate-macos.sh @@ -0,0 +1,781 @@ +#!/usr/bin/env bash +# validate-macos.sh - Validate macOS Application with embedded llama.xcframework using SwiftUI + +# Authentication options (optional) (can be set via environment variables) +# To use: export APPLE_ID=your.email@example.com +# export APPLE_PASSWORD=your-app-specific-password +# ./validate-macos.sh +APPLE_ID=${APPLE_ID:-""} +APPLE_PASSWORD=${APPLE_PASSWORD:-""} + +# Ensure the script exits on error +set -e + +# Function to print usage instructions +print_usage() { + echo "Usage: ./validate-macos.sh [OPTIONS]" + echo "" + echo "Options:" + echo " --help Show this help message" + echo " --apple-id EMAIL Apple ID email for validation" + echo " --apple-password PWD App-specific password for Apple ID" + echo "" + echo "Environment variables:" + echo " APPLE_ID Apple ID email for validation" + echo " APPLE_PASSWORD App-specific password for Apple ID" + echo "" + echo "Notes:" + echo " - Command line options take precedence over environment variables" + echo " - Authentication is optional. If not provided, alternative validation will be performed" + echo " - For APPLE_PASSWORD, use an app-specific password generated at https://appleid.apple.com/account/manage" +} + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + --help) + print_usage + exit 0 + ;; + --apple-id) + APPLE_ID="$2" + shift 2 + ;; + --apple-password) + APPLE_PASSWORD="$2" + shift 2 + ;; + *) + echo "Unknown option: $1" + print_usage + exit 1 + ;; + esac +done + +# Function to clean up in case of error +cleanup() { + # Don't clean up temp files on error to help with debugging + echo "===== macOS Validation Process Failed =====" + exit 1 +} + +# Set up trap to call cleanup function on error +trap cleanup ERR + +set -e # Exit on any error + +ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../.." && pwd )" +BUILD_DIR="${ROOT_DIR}/validation-builds/ios" + +# Configuration +APP_NAME="MacOSLlamaTest" +BUNDLE_ID="org.ggml.MacOSLlamaTest" +XCFRAMEWORK_PATH="${ROOT_DIR}/build-apple/llama.xcframework" +TEMP_DIR="${BUILD_DIR}/temp" +ARCHIVE_PATH="${BUILD_DIR}/${APP_NAME}.xcarchive" +APP_PATH="${BUILD_DIR}/${APP_NAME}.app" +ZIP_PATH="${BUILD_DIR}/${APP_NAME}.zip" +VALIDATION_DIR="${BUILD_DIR}/validation" + +# Create necessary directories +mkdir -p "${BUILD_DIR}" +mkdir -p "${TEMP_DIR}" +mkdir -p "${VALIDATION_DIR}" + +echo "===== macOS Validation Process Started =====" + +# 1. Create a simple test app project +echo "Creating test macOS app project..." +mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}" +cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Info.plist" << EOF +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> +<plist version="1.0"> +<dict> + <key>CFBundleDevelopmentRegion</key> + <string>en</string> + <key>CFBundleExecutable</key> + <string>${APP_NAME}</string> + <key>CFBundleIdentifier</key> + <string>${BUNDLE_ID}</string> + <key>CFBundleInfoDictionaryVersion</key> + <string>6.0</string> + <key>CFBundleName</key> + <string>${APP_NAME}</string> + <key>CFBundlePackageType</key> + <string>APPL</string> + <key>CFBundleShortVersionString</key> + <string>1.0</string> + <key>CFBundleVersion</key> + <string>1</string> + <key>LSMinimumSystemVersion</key> + <string>12.0</string> + <key>NSHumanReadableCopyright</key> + <string>Copyright © 2025 GGML. All rights reserved.</string> + <key>NSPrincipalClass</key> + <string>NSApplication</string> +</dict> +</plist> +EOF + +# Create SwiftUI app files +mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources" + +# Create App.swift +cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/App.swift" << EOF +import SwiftUI +import llama + +@main +struct LlamaTestApp: App { + var body: some Scene { + WindowGroup { + ContentView() + } + } +} +EOF + +# Create ContentView.swift with macOS specific elements +cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/ContentView.swift" << EOF +import SwiftUI +import llama + +struct ContentView: View { + // Test that we can initialize a llama context params struct + let params = llama_context_default_params() + + var body: some View { + VStack(spacing: 20) { + Text("Llama Framework Test on macOS") + .font(.largeTitle) + .padding() + + Text("llama_context_default_params() created successfully") + .font(.headline) + .multilineTextAlignment(.center) + .padding() + + // Display some param values to confirm the framework is working + Text("n_ctx: \(params.n_ctx)") + .font(.body) + + Text("n_batch: \(params.n_batch)") + .font(.body) + + Spacer() + } + .padding() + .frame(width: 600, height: 400) + } +} + +struct ContentView_Previews: PreviewProvider { + static var previews: some View { + ContentView() + } +} +EOF + +# Create project.pbxproj, fixing the framework search paths issues +mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj" +cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF' +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 54; + objects = { + +/* Begin PBXBuildFile section */ + 11111111111111111111111 /* App.swift in Sources */ = {isa = PBXBuildFile; fileRef = 22222222222222222222222; }; + 33333333333333333333333 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 44444444444444444444444; }; + 55555555555555555555555 /* llama.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; }; + 77777777777777777777777 /* llama.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; }; +/* End PBXBuildFile section */ + +/* Begin PBXCopyFilesBuildPhase section */ + 88888888888888888888888 /* Embed Frameworks */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = ""; + dstSubfolderSpec = 10; + files = ( + 77777777777777777777777 /* llama.xcframework in Embed Frameworks */, + ); + name = "Embed Frameworks"; + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXCopyFilesBuildPhase section */ + +/* Begin PBXFileReference section */ +EOF + +# Continue with the project.pbxproj file, using the APP_NAME variable appropriately +cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF + 99999999999999999999999 /* ${APP_NAME}.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "${APP_NAME}.app"; sourceTree = BUILT_PRODUCTS_DIR; }; + 22222222222222222222222 /* App.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = App.swift; sourceTree = "<group>"; }; + 44444444444444444444444 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; }; + AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; }; + 66666666666666666666666 /* llama.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; path = llama.xcframework; sourceTree = "<group>"; }; +/* End PBXFileReference section */ +EOF + +# Add the rest of the project file with fixed framework search paths +cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF' +/* Begin PBXFrameworksBuildPhase section */ + BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 55555555555555555555555 /* llama.xcframework in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ +EOF + +# Continue with the project.pbxproj file, using the APP_NAME variable appropriately +cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF + CCCCCCCCCCCCCCCCCCCCCCCC /* Products */ = { + isa = PBXGroup; + children = ( + 99999999999999999999999 /* ${APP_NAME}.app */, + ); + name = Products; + sourceTree = "<group>"; + }; +EOF + +cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF' + DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */ = { + isa = PBXGroup; + children = ( + 66666666666666666666666 /* llama.xcframework */, + ); + name = Frameworks; + sourceTree = "<group>"; + }; + EEEEEEEEEEEEEEEEEEEEEEEE = { + isa = PBXGroup; + children = ( + FFFFFFFFFFFFFFFFFFFFFFFF /* MacOSLlamaTest */, + CCCCCCCCCCCCCCCCCCCCCCCC /* Products */, + DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */, + ); + sourceTree = "<group>"; + }; + FFFFFFFFFFFFFFFFFFFFFFFF /* MacOSLlamaTest */ = { + isa = PBXGroup; + children = ( + 1111111111111111111111AA /* Sources */, + AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */, + ); + path = "MacOSLlamaTest"; + sourceTree = "<group>"; + }; + 1111111111111111111111AA /* Sources */ = { + isa = PBXGroup; + children = ( + 22222222222222222222222 /* App.swift */, + 44444444444444444444444 /* ContentView.swift */, + ); + path = Sources; + sourceTree = "<group>"; + }; +/* End PBXGroup section */ +EOF + +# Continue with the project.pbxproj file, using the APP_NAME variable appropriately +cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF +/* Begin PBXNativeTarget section */ + 3333333333333333333333AA /* ${APP_NAME} */ = { + isa = PBXNativeTarget; + buildConfigurationList = 4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */; + buildPhases = ( + 5555555555555555555555AA /* Sources */, + BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */, + 6666666666666666666666AA /* Resources */, + 88888888888888888888888 /* Embed Frameworks */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = "${APP_NAME}"; + productName = "${APP_NAME}"; + productReference = 99999999999999999999999 /* ${APP_NAME}.app */; + productType = "com.apple.product-type.application"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 7777777777777777777777AA /* Project object */ = { + isa = PBXProject; + attributes = { + LastSwiftUpdateCheck = 1240; + LastUpgradeCheck = 1240; + TargetAttributes = { + 3333333333333333333333AA = { + CreatedOnToolsVersion = 12.4; + }; + }; + }; + buildConfigurationList = 8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */; + compatibilityVersion = "Xcode 12.0"; + developmentRegion = en; + hasScannedForEncodings = 0; + knownRegions = ( + en, + Base, + ); + mainGroup = EEEEEEEEEEEEEEEEEEEEEEEE; + productRefGroup = CCCCCCCCCCCCCCCCCCCCCCCC /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 3333333333333333333333AA /* ${APP_NAME} */, + ); + }; +/* End PBXProject section */ +EOF + +# Add the rest of the file with correct FRAMEWORK_SEARCH_PATHS and macOS settings +cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF' +/* Begin PBXResourcesBuildPhase section */ + 6666666666666666666666AA /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + 5555555555555555555555AA /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 33333333333333333333333 /* ContentView.swift in Sources */, + 11111111111111111111111 /* App.swift in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + 9999999999999999999999AA /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + MACOSX_DEPLOYMENT_TARGET = 12.0; + MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; + MTL_FAST_MATH = YES; + ONLY_ACTIVE_ARCH = YES; + SDKROOT = macosx; + SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG; + SWIFT_OPTIMIZATION_LEVEL = "-Onone"; + }; + name = Debug; + }; + AAAAAAAAAAAAAAAAAAAAABBB /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + MACOSX_DEPLOYMENT_TARGET = 12.0; + MTL_ENABLE_DEBUG_INFO = NO; + MTL_FAST_MATH = YES; + SDKROOT = macosx; + SWIFT_COMPILATION_MODE = wholemodule; + SWIFT_OPTIMIZATION_LEVEL = "-O"; + }; + name = Release; + }; + BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; + CODE_SIGN_STYLE = Manual; + COMBINE_HIDPI_IMAGES = YES; + DEVELOPMENT_TEAM = ""; + ENABLE_HARDENED_RUNTIME = YES; + ENABLE_PREVIEWS = YES; + FRAMEWORK_SEARCH_PATHS = "$(PROJECT_DIR)"; + INFOPLIST_FILE = "MacOSLlamaTest/Info.plist"; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/../Frameworks", + ); + PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.MacOSLlamaTest"; + PRODUCT_NAME = "$(TARGET_NAME)"; + PROVISIONING_PROFILE_SPECIFIER = ""; + SWIFT_VERSION = 5.0; + }; + name = Debug; + }; + CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; + CODE_SIGN_STYLE = Manual; + COMBINE_HIDPI_IMAGES = YES; + DEVELOPMENT_TEAM = ""; + ENABLE_HARDENED_RUNTIME = YES; + ENABLE_PREVIEWS = YES; + FRAMEWORK_SEARCH_PATHS = ( + "$(inherited)", + "$(PROJECT_DIR)", + ); + INFOPLIST_FILE = "MacOSLlamaTest/Info.plist"; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/../Frameworks", + ); + PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.MacOSLlamaTest"; + PRODUCT_NAME = "$(TARGET_NAME)"; + PROVISIONING_PROFILE_SPECIFIER = ""; + SWIFT_VERSION = 5.0; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ +EOF + +# Finish the project.pbxproj file +cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF +/* Begin XCConfigurationList section */ + 8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 9999999999999999999999AA /* Debug */, + AAAAAAAAAAAAAAAAAAAAABBB /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */, + CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 7777777777777777777777AA /* Project object */; +} +EOF + +# 2. Copy XCFramework to test project +echo "Copying XCFramework to test project..." +cp -R "${XCFRAMEWORK_PATH}" "${TEMP_DIR}/${APP_NAME}/" + +# 3. Build and archive the app +echo "Building and archiving test app..." +cd "${TEMP_DIR}/${APP_NAME}" + +# Create a simple xcscheme file to avoid xcodebuild scheme issues +mkdir -p "${APP_NAME}.xcodeproj/xcshareddata/xcschemes" +cat > "${APP_NAME}.xcodeproj/xcshareddata/xcschemes/${APP_NAME}.xcscheme" << EOF +<?xml version="1.0" encoding="UTF-8"?> +<Scheme + LastUpgradeVersion = "1240" + version = "1.3"> + <BuildAction + parallelizeBuildables = "YES" + buildImplicitDependencies = "YES"> + <BuildActionEntries> + <BuildActionEntry + buildForTesting = "YES" + buildForRunning = "YES" + buildForProfiling = "YES" + buildForArchiving = "YES" + buildForAnalyzing = "YES"> + <BuildableReference + BuildableIdentifier = "primary" + BlueprintIdentifier = "3333333333333333333333AA" + BuildableName = "${APP_NAME}.app" + BlueprintName = "${APP_NAME}" + ReferencedContainer = "container:${APP_NAME}.xcodeproj"> + </BuildableReference> + </BuildActionEntry> + </BuildActionEntries> + </BuildAction> + <TestAction + buildConfiguration = "Debug" + selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB" + selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB" + shouldUseLaunchSchemeArgsEnv = "YES"> + <Testables> + </Testables> + </TestAction> + <LaunchAction + buildConfiguration = "Debug" + selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB" + selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB" + launchStyle = "0" + useCustomWorkingDirectory = "NO" + ignoresPersistentStateOnLaunch = "NO" + debugDocumentVersioning = "YES" + debugServiceExtension = "internal" + allowLocationSimulation = "YES"> + <BuildableProductRunnable + runnableDebuggingMode = "0"> + <BuildableReference + BuildableIdentifier = "primary" + BlueprintIdentifier = "3333333333333333333333AA" + BuildableName = "${APP_NAME}.app" + BlueprintName = "${APP_NAME}" + ReferencedContainer = "container:${APP_NAME}.xcodeproj"> + </BuildableReference> + </BuildableProductRunnable> + </LaunchAction> + <ProfileAction + buildConfiguration = "Release" + shouldUseLaunchSchemeArgsEnv = "YES" + savedToolIdentifier = "" + useCustomWorkingDirectory = "NO" + debugDocumentVersioning = "YES"> + <BuildableProductRunnable + runnableDebuggingMode = "0"> + <BuildableReference + BuildableIdentifier = "primary" + BlueprintIdentifier = "3333333333333333333333AA" + BuildableName = "${APP_NAME}.app" + BlueprintName = "${APP_NAME}" + ReferencedContainer = "container:${APP_NAME}.xcodeproj"> + </BuildableReference> + </BuildableProductRunnable> + </ProfileAction> + <AnalyzeAction + buildConfiguration = "Debug"> + </AnalyzeAction> + <ArchiveAction + buildConfiguration = "Release" + revealArchiveInOrganizer = "YES"> + </ArchiveAction> +</Scheme> +EOF + +# Now use xcodebuild with an explicitly defined product name for macOS +xcodebuild -project "${APP_NAME}.xcodeproj" -scheme "${APP_NAME}" -sdk macosx -configuration Release archive -archivePath "${ARCHIVE_PATH}" CODE_SIGN_IDENTITY="-" CODE_SIGNING_REQUIRED=NO CODE_SIGNING_ALLOWED=NO PRODUCT_NAME="${APP_NAME}" SWIFT_OPTIMIZATION_LEVEL="-Onone" -quiet + +# 4. Create a package for distribution +echo "Creating distributable package from archive..." +cp -R "${ARCHIVE_PATH}/Products/Applications/${APP_NAME}.app" "${APP_PATH}" + +# Check and log app structure +echo "App structure:" +ls -la "${APP_PATH}" +echo "Frameworks:" +ls -la "${APP_PATH}/Contents/Frameworks/" 2>/dev/null || echo "No Frameworks directory found" + +# Create a zip file for potential distribution +cd "${BUILD_DIR}" +zip -r "${ZIP_PATH}" "${APP_NAME}.app" + +# Check embedded provisioning profile +echo "Checking provisioning profile (if any)..." +PROVISIONING_PROFILE=$(find "${APP_PATH}/Contents" -name "embedded.provisionprofile" 2>/dev/null) +if [ -n "$PROVISIONING_PROFILE" ]; then + echo "Found embedded provisioning profile:" + security cms -D -i "$PROVISIONING_PROFILE" || echo "Unable to decode provisioning profile" +else + echo "No embedded provisioning profile found (expected for ad-hoc builds)" +fi + +# 5. Validate the app +echo "Validating macOS app..." +VALIDATION_OUTPUT="${VALIDATION_DIR}/validation_output.txt" + +# Check if authentication credentials are provided +AUTH_ARGS="" +if [ -n "$APPLE_ID" ] && [ -n "$APPLE_PASSWORD" ]; then + echo "Using Apple ID authentication for validation..." + AUTH_ARGS="--username \"$APPLE_ID\" --password \"$APPLE_PASSWORD\"" +else + echo "No authentication credentials provided. Will perform basic validation." + echo "To use your personal developer account, you can run the script with:" + echo " APPLE_ID='your.email@example.com' APPLE_PASSWORD='your-app-specific-password' ./validate-macos.sh" + echo "Note: You need to create an app-specific password at https://appleid.apple.com/account/manage" +fi + +# For macOS we need to use notarytool or alternative checks because altool doesn't support macOS apps in the same way +echo "Note: For macOS, formal notarization process would require Apple Developer credentials." +echo "Performing alternative validation checks..." + +# Final validation result +FINAL_VALIDATION_RESULT=0 + +# Check if app was created successfully +if [ -d "${APP_PATH}" ] && [ -s "${APP_PATH}/Contents/MacOS/${APP_NAME}" ]; then + echo "✅ App package created successfully" +else + echo "❌ App package not created or binary missing" + FINAL_VALIDATION_RESULT=1 +fi + +# Check if app binary exists and is executable +if [ -f "${APP_PATH}/Contents/MacOS/${APP_NAME}" ] && [ -x "${APP_PATH}/Contents/MacOS/${APP_NAME}" ]; then + echo "✅ App binary exists and is executable" +else + echo "❌ App binary missing or not executable" + FINAL_VALIDATION_RESULT=1 +fi + +# Check if framework was properly embedded +if [ -d "${APP_PATH}/Contents/Frameworks/llama.framework" ]; then + echo "✅ llama.framework properly embedded" +else + echo "❌ llama.framework not properly embedded" + FINAL_VALIDATION_RESULT=1 +fi + +# Check if framework binary exists +if [ -f "${APP_PATH}/Contents/Frameworks/llama.framework/Versions/A/llama" ]; then + echo "✅ Framework binary exists" + + # Further validate framework by checking architecture + ARCHS=$(lipo -info "${APP_PATH}/Contents/Frameworks/llama.framework/Versions/A/llama" 2>/dev/null | grep -o "arm64\\|x86_64" | tr '\n' ' ') + if [ -n "$ARCHS" ]; then + echo "✅ Framework architecture(s): $ARCHS" + else + echo "⚠️ Could not determine framework architecture" + fi +else + echo "❌ Framework binary missing" + FINAL_VALIDATION_RESULT=1 +fi + +# Check code signing +echo "" +echo "==== CODE SIGNING INFO ====" +codesign -vv -d "${APP_PATH}" 2>&1 || echo "Code signing verification not available (expected for ad-hoc builds)" + +if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then + if [ -n "$AUTH_ARGS" ]; then + echo "" + echo "To notarize this app with Apple (requires Apple Developer account):" + echo "xcrun notarytool submit \"${ZIP_PATH}\" --apple-id \"your-apple-id\" --password \"your-app-specific-password\" --team-id \"your-team-id\" --wait" + echo "" + fi + echo "✅ Validation PASSED: macOS app built successfully with embedded framework" +else + echo "❌ Validation FAILED: Issues found with the app or framework" +fi + +# Don't clean up on error to allow inspection +if [ $FINAL_VALIDATION_RESULT -ne 0 ]; then + echo "" + echo "Temporary files kept for inspection at: ${TEMP_DIR}" + echo "===== macOS Validation Process Failed =====" + exit 1 +fi + +# Clean up temporary files but keep build artifacts +if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then + echo "Cleaning up temporary files..." + #rm -rf "${TEMP_DIR}" +fi + +echo "===== macOS Validation Process Completed =====" +echo "App package available at: ${APP_PATH}" +echo "Zipped app available at: ${ZIP_PATH}" +exit $FINAL_VALIDATION_RESULT diff --git a/llama.cpp/scripts/apple/validate-tvos.sh b/llama.cpp/scripts/apple/validate-tvos.sh new file mode 100755 index 0000000..b4da698 --- /dev/null +++ b/llama.cpp/scripts/apple/validate-tvos.sh @@ -0,0 +1,813 @@ +#!/usr/bin/env bash +# validate-tvos.sh - Validate tvOS Application with embedded llama.xcframework using SwiftUI + +# Authentication options (optional) (can be set via environment variables) +# To use: export APPLE_ID=your.email@example.com +# export APPLE_PASSWORD=your-app-specific-password +# ./validate-tvos.sh +APPLE_ID=${APPLE_ID:-""} +APPLE_PASSWORD=${APPLE_PASSWORD:-""} + +# Ensure the script exits on error +set -e + +# Function to print usage instructions +print_usage() { + echo "Usage: ./validate-tvos.sh [OPTIONS]" + echo "" + echo "Options:" + echo " --help Show this help message" + echo " --apple-id EMAIL Apple ID email for validation" + echo " --apple-password PWD App-specific password for Apple ID" + echo "" + echo "Environment variables:" + echo " APPLE_ID Apple ID email for validation" + echo " APPLE_PASSWORD App-specific password for Apple ID" + echo "" + echo "Notes:" + echo " - Command line options take precedence over environment variables" + echo " - Authentication is optional. If not provided, alternative validation will be performed" + echo " - For APPLE_PASSWORD, use an app-specific password generated at https://appleid.apple.com/account/manage" +} + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + --help) + print_usage + exit 0 + ;; + --apple-id) + APPLE_ID="$2" + shift 2 + ;; + --apple-password) + APPLE_PASSWORD="$2" + shift 2 + ;; + *) + echo "Unknown option: $1" + print_usage + exit 1 + ;; + esac +done + +# Function to clean up in case of error +cleanup() { + # Don't clean up temp files on error to help with debugging + echo "===== tvOS Validation Process Failed =====" + exit 1 +} + +# Set up trap to call cleanup function on error +trap cleanup ERR + +set -e # Exit on any error + +ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../.." && pwd )" +BUILD_DIR="${ROOT_DIR}/validation-builds/ios" + +# Configuration +APP_NAME="TVOSLlamaTest" +BUNDLE_ID="org.ggml.TVOSLlamaTest" +XCFRAMEWORK_PATH="${ROOT_DIR}/build-apple/llama.xcframework" +TEMP_DIR="${BUILD_DIR}/temp" +ARCHIVE_PATH="${BUILD_DIR}/${APP_NAME}.xcarchive" +IPA_PATH="${BUILD_DIR}/${APP_NAME}.ipa" +VALIDATION_DIR="${BUILD_DIR}/validation" + +# Create necessary directories +mkdir -p "${BUILD_DIR}" +mkdir -p "${TEMP_DIR}" +mkdir -p "${VALIDATION_DIR}" + +echo "===== tvOS Validation Process Started =====" + +# 1. Create a simple test app project +echo "Creating test tvOS app project..." +mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}" +cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Info.plist" << EOF +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> +<plist version="1.0"> +<dict> + <key>CFBundleDevelopmentRegion</key> + <string>en</string> + <key>CFBundleExecutable</key> + <string>${APP_NAME}</string> + <key>CFBundleIdentifier</key> + <string>${BUNDLE_ID}</string> + <key>CFBundleInfoDictionaryVersion</key> + <string>6.0</string> + <key>CFBundleName</key> + <string>${APP_NAME}</string> + <key>CFBundlePackageType</key> + <string>APPL</string> + <key>CFBundleShortVersionString</key> + <string>1.0</string> + <key>CFBundleVersion</key> + <string>1</string> + <key>UIRequiredDeviceCapabilities</key> + <array> + <string>arm64</string> + </array> +</dict> +</plist> +EOF + +# Create SwiftUI app files +mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources" + +# Create App.swift +cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/App.swift" << EOF +import SwiftUI +import llama + +@main +struct LlamaTestApp: App { + var body: some Scene { + WindowGroup { + ContentView() + } + } +} +EOF + +# Create ContentView.swift with tvOS specific elements +cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/ContentView.swift" << EOF +import SwiftUI +import llama + +struct ContentView: View { + // Test that we can initialize a llama context params struct + let params = llama_context_default_params() + + var body: some View { + VStack(spacing: 40) { + Text("Llama Framework Test on tvOS") + .font(.largeTitle) + .padding() + + Text("llama_context_default_params() created successfully") + .font(.headline) + .multilineTextAlignment(.center) + .padding() + + // Display some param values to confirm the framework is working + Text("n_ctx: \(params.n_ctx)") + .font(.title2) + + Text("n_batch: \(params.n_batch)") + .font(.title2) + + Spacer() + } + .padding(50) + // Larger size suitable for TV display + } +} + +struct ContentView_Previews: PreviewProvider { + static var previews: some View { + ContentView() + } +} +EOF + +# Create project.pbxproj, fixing the framework search paths issues +mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj" +cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF' +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 54; + objects = { + +/* Begin PBXBuildFile section */ + 11111111111111111111111 /* App.swift in Sources */ = {isa = PBXBuildFile; fileRef = 22222222222222222222222; }; + 33333333333333333333333 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 44444444444444444444444; }; + 55555555555555555555555 /* llama.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; }; + 77777777777777777777777 /* llama.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; }; +/* End PBXBuildFile section */ + +/* Begin PBXCopyFilesBuildPhase section */ + 88888888888888888888888 /* Embed Frameworks */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = ""; + dstSubfolderSpec = 10; + files = ( + 77777777777777777777777 /* llama.xcframework in Embed Frameworks */, + ); + name = "Embed Frameworks"; + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXCopyFilesBuildPhase section */ + +/* Begin PBXFileReference section */ +EOF + +# Continue with the project.pbxproj file, using the APP_NAME variable appropriately +cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF + 99999999999999999999999 /* ${APP_NAME}.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "${APP_NAME}.app"; sourceTree = BUILT_PRODUCTS_DIR; }; + 22222222222222222222222 /* App.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = App.swift; sourceTree = "<group>"; }; + 44444444444444444444444 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; }; + AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; }; + 66666666666666666666666 /* llama.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; path = llama.xcframework; sourceTree = "<group>"; }; +/* End PBXFileReference section */ +EOF + +# Add the rest of the project file with fixed framework search paths +cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF' +/* Begin PBXFrameworksBuildPhase section */ + BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 55555555555555555555555 /* llama.xcframework in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ +EOF + +# Continue with the project.pbxproj file, using the APP_NAME variable appropriately +cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF + CCCCCCCCCCCCCCCCCCCCCCCC /* Products */ = { + isa = PBXGroup; + children = ( + 99999999999999999999999 /* ${APP_NAME}.app */, + ); + name = Products; + sourceTree = "<group>"; + }; +EOF + +cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF' + DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */ = { + isa = PBXGroup; + children = ( + 66666666666666666666666 /* llama.xcframework */, + ); + name = Frameworks; + sourceTree = "<group>"; + }; + EEEEEEEEEEEEEEEEEEEEEEEE = { + isa = PBXGroup; + children = ( + FFFFFFFFFFFFFFFFFFFFFFFF /* TVOSLlamaTest */, + CCCCCCCCCCCCCCCCCCCCCCCC /* Products */, + DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */, + ); + sourceTree = "<group>"; + }; + FFFFFFFFFFFFFFFFFFFFFFFF /* TVOSLlamaTest */ = { + isa = PBXGroup; + children = ( + 1111111111111111111111AA /* Sources */, + AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */, + ); + path = "TVOSLlamaTest"; + sourceTree = "<group>"; + }; + 1111111111111111111111AA /* Sources */ = { + isa = PBXGroup; + children = ( + 22222222222222222222222 /* App.swift */, + 44444444444444444444444 /* ContentView.swift */, + ); + path = Sources; + sourceTree = "<group>"; + }; +/* End PBXGroup section */ +EOF + +# Continue with the project.pbxproj file, using the APP_NAME variable appropriately +cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF +/* Begin PBXNativeTarget section */ + 3333333333333333333333AA /* ${APP_NAME} */ = { + isa = PBXNativeTarget; + buildConfigurationList = 4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */; + buildPhases = ( + 5555555555555555555555AA /* Sources */, + BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */, + 6666666666666666666666AA /* Resources */, + 88888888888888888888888 /* Embed Frameworks */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = "${APP_NAME}"; + productName = "${APP_NAME}"; + productReference = 99999999999999999999999 /* ${APP_NAME}.app */; + productType = "com.apple.product-type.application"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 7777777777777777777777AA /* Project object */ = { + isa = PBXProject; + attributes = { + LastSwiftUpdateCheck = 1240; + LastUpgradeCheck = 1240; + TargetAttributes = { + 3333333333333333333333AA = { + CreatedOnToolsVersion = 12.4; + }; + }; + }; + buildConfigurationList = 8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */; + compatibilityVersion = "Xcode 12.0"; + developmentRegion = en; + hasScannedForEncodings = 0; + knownRegions = ( + en, + Base, + ); + mainGroup = EEEEEEEEEEEEEEEEEEEEEEEE; + productRefGroup = CCCCCCCCCCCCCCCCCCCCCCCC /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 3333333333333333333333AA /* ${APP_NAME} */, + ); + }; +/* End PBXProject section */ +EOF + +# Add the rest of the file with correct FRAMEWORK_SEARCH_PATHS and tvOS settings +cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF' +/* Begin PBXResourcesBuildPhase section */ + 6666666666666666666666AA /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + 5555555555555555555555AA /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 33333333333333333333333 /* ContentView.swift in Sources */, + 11111111111111111111111 /* App.swift in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + 9999999999999999999999AA /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + TVOS_DEPLOYMENT_TARGET = 15.0; + MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; + MTL_FAST_MATH = YES; + ONLY_ACTIVE_ARCH = YES; + SDKROOT = appletvos; + SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG; + SWIFT_OPTIMIZATION_LEVEL = "-Onone"; + }; + name = Debug; + }; + AAAAAAAAAAAAAAAAAAAAABBB /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + TVOS_DEPLOYMENT_TARGET = 15.0; + MTL_ENABLE_DEBUG_INFO = NO; + MTL_FAST_MATH = YES; + SDKROOT = appletvos; + SWIFT_COMPILATION_MODE = wholemodule; + SWIFT_OPTIMIZATION_LEVEL = "-O"; + VALIDATE_PRODUCT = YES; + }; + name = Release; + }; + BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; + CODE_SIGN_STYLE = Manual; + DEVELOPMENT_TEAM = ""; + ENABLE_PREVIEWS = YES; + FRAMEWORK_SEARCH_PATHS = "$(PROJECT_DIR)"; + INFOPLIST_FILE = "TVOSLlamaTest/Info.plist"; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + ); + PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.TVOSLlamaTest"; + PRODUCT_NAME = "$(TARGET_NAME)"; + PROVISIONING_PROFILE_SPECIFIER = ""; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = 3; + }; + name = Debug; + }; + CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; + CODE_SIGN_STYLE = Manual; + DEVELOPMENT_TEAM = ""; + ENABLE_PREVIEWS = YES; + FRAMEWORK_SEARCH_PATHS = ( + "$(inherited)", + "$(PROJECT_DIR)", + ); + INFOPLIST_FILE = "TVOSLlamaTest/Info.plist"; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + ); + PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.TVOSLlamaTest"; + PRODUCT_NAME = "$(TARGET_NAME)"; + PROVISIONING_PROFILE_SPECIFIER = ""; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = 3; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ +EOF + +# Finish the project.pbxproj file +cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF +/* Begin XCConfigurationList section */ + 8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 9999999999999999999999AA /* Debug */, + AAAAAAAAAAAAAAAAAAAAABBB /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */, + CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 7777777777777777777777AA /* Project object */; +} +EOF + +# 2. Copy XCFramework to test project +echo "Copying XCFramework to test project..." +cp -R "${XCFRAMEWORK_PATH}" "${TEMP_DIR}/${APP_NAME}/" + +# 3. Build and archive the app +echo "Building and archiving test app..." +cd "${TEMP_DIR}/${APP_NAME}" + +# Create a simple xcscheme file to avoid xcodebuild scheme issues +mkdir -p "${APP_NAME}.xcodeproj/xcshareddata/xcschemes" +cat > "${APP_NAME}.xcodeproj/xcshareddata/xcschemes/${APP_NAME}.xcscheme" << EOF +<?xml version="1.0" encoding="UTF-8"?> +<Scheme + LastUpgradeVersion = "1240" + version = "1.3"> + <BuildAction + parallelizeBuildables = "YES" + buildImplicitDependencies = "YES"> + <BuildActionEntries> + <BuildActionEntry + buildForTesting = "YES" + buildForRunning = "YES" + buildForProfiling = "YES" + buildForArchiving = "YES" + buildForAnalyzing = "YES"> + <BuildableReference + BuildableIdentifier = "primary" + BlueprintIdentifier = "3333333333333333333333AA" + BuildableName = "${APP_NAME}.app" + BlueprintName = "${APP_NAME}" + ReferencedContainer = "container:${APP_NAME}.xcodeproj"> + </BuildableReference> + </BuildActionEntry> + </BuildActionEntries> + </BuildAction> + <TestAction + buildConfiguration = "Debug" + selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB" + selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB" + shouldUseLaunchSchemeArgsEnv = "YES"> + <Testables> + </Testables> + </TestAction> + <LaunchAction + buildConfiguration = "Debug" + selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB" + selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB" + launchStyle = "0" + useCustomWorkingDirectory = "NO" + ignoresPersistentStateOnLaunch = "NO" + debugDocumentVersioning = "YES" + debugServiceExtension = "internal" + allowLocationSimulation = "YES"> + <BuildableProductRunnable + runnableDebuggingMode = "0"> + <BuildableReference + BuildableIdentifier = "primary" + BlueprintIdentifier = "3333333333333333333333AA" + BuildableName = "${APP_NAME}.app" + BlueprintName = "${APP_NAME}" + ReferencedContainer = "container:${APP_NAME}.xcodeproj"> + </BuildableReference> + </BuildableProductRunnable> + </LaunchAction> + <ProfileAction + buildConfiguration = "Release" + shouldUseLaunchSchemeArgsEnv = "YES" + savedToolIdentifier = "" + useCustomWorkingDirectory = "NO" + debugDocumentVersioning = "YES"> + <BuildableProductRunnable + runnableDebuggingMode = "0"> + <BuildableReference + BuildableIdentifier = "primary" + BlueprintIdentifier = "3333333333333333333333AA" + BuildableName = "${APP_NAME}.app" + BlueprintName = "${APP_NAME}" + ReferencedContainer = "container:${APP_NAME}.xcodeproj"> + </BuildableReference> + </BuildableProductRunnable> + </ProfileAction> + <AnalyzeAction + buildConfiguration = "Debug"> + </AnalyzeAction> + <ArchiveAction + buildConfiguration = "Release" + revealArchiveInOrganizer = "YES"> + </ArchiveAction> +</Scheme> +EOF + +# Now use xcodebuild with an explicitly defined product name for tvOS +xcodebuild -project "${APP_NAME}.xcodeproj" -scheme "${APP_NAME}" -sdk appletvos -configuration Release archive -archivePath "${ARCHIVE_PATH}" CODE_SIGN_IDENTITY="-" CODE_SIGNING_REQUIRED=NO CODE_SIGNING_ALLOWED=NO PRODUCT_NAME="${APP_NAME}" SWIFT_OPTIMIZATION_LEVEL="-Onone" -quiet + +# 4. Create IPA from archive +echo "Creating IPA from archive..." +mkdir -p "${TEMP_DIR}/Payload" +cp -R "${ARCHIVE_PATH}/Products/Applications/${APP_NAME}.app" "${TEMP_DIR}/Payload/" + +# Check and log app structure before zipping +echo "App structure:" +ls -la "${TEMP_DIR}/Payload/${APP_NAME}.app/" +echo "Frameworks:" +ls -la "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/" 2>/dev/null || echo "No Frameworks directory found" + +cd "${TEMP_DIR}" +zip -r "${IPA_PATH}" Payload + +# Check embedded provisioning profile +echo "Checking provisioning profile (if any)..." +PROVISIONING_PROFILE=$(find "${ARCHIVE_PATH}/Products/Applications/${APP_NAME}.app" -name "embedded.mobileprovision" 2>/dev/null) +if [ -n "$PROVISIONING_PROFILE" ]; then + echo "Found embedded provisioning profile:" + security cms -D -i "$PROVISIONING_PROFILE" || echo "Unable to decode provisioning profile" +else + echo "No embedded provisioning profile found (expected for ad-hoc builds)" +fi + +# 5. Validate the IPA +echo "Validating IPA..." +VALIDATION_OUTPUT="${VALIDATION_DIR}/validation_output.txt" + +# Check if authentication credentials are provided +AUTH_ARGS="" +if [ -n "$APPLE_ID" ] && [ -n "$APPLE_PASSWORD" ]; then + echo "Using Apple ID authentication for validation..." + AUTH_ARGS="--username \"$APPLE_ID\" --password \"$APPLE_PASSWORD\"" +else + echo "No authentication credentials provided. Will perform basic validation." + echo "To use your personal developer account, you can run the script with:" + echo " APPLE_ID='your.email@example.com' APPLE_PASSWORD='your-app-specific-password' ./validate-tvos.sh" + echo "Note: You need to create an app-specific password at https://appleid.apple.com/account/manage" +fi + +# Run validation with detailed output +echo "Running validation with altool..." +if [ -n "$AUTH_ARGS" ]; then + # Use eval to properly handle the quoted arguments + eval "xcrun altool --validate-app -f \"${IPA_PATH}\" --type tvos --output-format xml $AUTH_ARGS" 2>&1 | tee "${VALIDATION_OUTPUT}" +else + xcrun altool --validate-app -f "${IPA_PATH}" --type tvos --output-format xml 2>&1 | tee "${VALIDATION_OUTPUT}" +fi +VALIDATION_RESULT=$? + +# Final validation result +FINAL_VALIDATION_RESULT=0 + +# Check if validation failed because the app isn't in App Store Connect +if grep -q "No suitable application records were found" "${VALIDATION_OUTPUT}"; then + echo "⚠️ App Store Connect Warning: The app bundle identifier is not found in App Store Connect" + echo "This is expected for apps that haven't been registered in App Store Connect yet." + echo "This doesn't indicate a problem with the build or framework." + + # Perform alternative validation + echo "Performing alternative validation checks..." + + # Check if IPA was created successfully + if [ -f "${IPA_PATH}" ] && [ -s "${IPA_PATH}" ]; then + echo "✅ IPA file created successfully" + else + echo "❌ IPA file not created or empty" + FINAL_VALIDATION_RESULT=1 + fi + + # Check if app binary exists and is executable + if [ -f "${TEMP_DIR}/Payload/${APP_NAME}.app/${APP_NAME}" ] && [ -x "${TEMP_DIR}/Payload/${APP_NAME}.app/${APP_NAME}" ]; then + echo "✅ App binary exists and is executable" + else + echo "❌ App binary missing or not executable" + FINAL_VALIDATION_RESULT=1 + fi + + # Check if framework was properly embedded + if [ -d "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework" ]; then + echo "✅ llama.framework properly embedded" + else + echo "❌ llama.framework not properly embedded" + FINAL_VALIDATION_RESULT=1 + fi + + # Check if framework binary exists + if [ -f "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework/llama" ]; then + echo "✅ Framework binary exists" + + # Further validate framework by checking architecture + ARCHS=$(lipo -info "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework/llama" 2>/dev/null | grep -o "arm64\\|x86_64" | tr '\n' ' ') + if [ -n "$ARCHS" ]; then + echo "✅ Framework architecture(s): $ARCHS" + else + echo "⚠️ Could not determine framework architecture" + fi + else + echo "❌ Framework binary missing" + FINAL_VALIDATION_RESULT=1 + fi + + if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then + echo "✅ Alternative validation PASSED: App built successfully with embedded framework" + else + echo "❌ Alternative validation FAILED: Issues found with the app or framework" + fi +elif grep -q "You must specify authentication credentials" "${VALIDATION_OUTPUT}" && [ -z "$AUTH_ARGS" ]; then + echo "✅ tvOS Validation PASSED: IPA successfully validated" + echo "Results saved to ${VALIDATION_OUTPUT}" +else + echo "❌ tvOS Validation FAILED: IPA validation found issues" + echo "See validation output at ${VALIDATION_OUTPUT}" + echo "" + echo "==== VALIDATION ERRORS ====" + + # Try to extract specific errors from the output + if grep -q "Error" "${VALIDATION_OUTPUT}"; then + grep -A 5 "Error" "${VALIDATION_OUTPUT}" + else + # If no specific error found, show the whole log + cat "${VALIDATION_OUTPUT}" + fi + + # Additional debugging: check IPA contents + echo "" + echo "==== IPA CONTENTS ====" + mkdir -p "${TEMP_DIR}/ipa_contents" + unzip -q "${IPA_PATH}" -d "${TEMP_DIR}/ipa_contents" + ls -la "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app/" + + # Check for code signing issues + echo "" + echo "==== CODE SIGNING INFO ====" + codesign -vv -d "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app" 2>&1 || echo "Code signing verification failed" + + # Check embedded frameworks + echo "" + echo "==== FRAMEWORK INFO ====" + ls -la "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app/Frameworks/" 2>/dev/null || echo "No Frameworks directory found" +fi + +# Don't clean up on error to allow inspection +if [ $FINAL_VALIDATION_RESULT -ne 0 ]; then + echo "" + echo "Temporary files kept for inspection at: ${TEMP_DIR}" + echo "===== tvOS Validation Process Failed =====" + exit 1 +fi + +# Clean up temporary files but keep build artifacts +if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then + echo "Cleaning up temporary files..." + #rm -rf "${TEMP_DIR}" +fi + +echo "===== tvOS Validation Process Completed =====" +exit $FINAL_VALIDATION_RESULT diff --git a/llama.cpp/scripts/apple/validate-visionos.sh b/llama.cpp/scripts/apple/validate-visionos.sh new file mode 100755 index 0000000..bbdec66 --- /dev/null +++ b/llama.cpp/scripts/apple/validate-visionos.sh @@ -0,0 +1,811 @@ +#!/usr/bin/env bash +# validate-visionos.sh - Validate visionOS Application with embedded llama.xcframework using SwiftUI + +# Authentication options (optional) (can be set via environment variables) +# To use: export APPLE_ID=your.email@example.com +# export APPLE_PASSWORD=your-app-specific-password +# ./validate-visionos.sh +APPLE_ID=${APPLE_ID:-""} +APPLE_PASSWORD=${APPLE_PASSWORD:-""} + +# Ensure the script exits on error +set -e + +# Function to print usage instructions +print_usage() { + echo "Usage: ./validate-visionos.sh [OPTIONS]" + echo "" + echo "Options:" + echo " --help Show this help message" + echo " --apple-id EMAIL Apple ID email for validation" + echo " --apple-password PWD App-specific password for Apple ID" + echo "" + echo "Environment variables:" + echo " APPLE_ID Apple ID email for validation" + echo " APPLE_PASSWORD App-specific password for Apple ID" + echo "" + echo "Notes:" + echo " - Command line options take precedence over environment variables" + echo " - Authentication is optional. If not provided, alternative validation will be performed" + echo " - For APPLE_PASSWORD, use an app-specific password generated at https://appleid.apple.com/account/manage" +} + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + --help) + print_usage + exit 0 + ;; + --apple-id) + APPLE_ID="$2" + shift 2 + ;; + --apple-password) + APPLE_PASSWORD="$2" + shift 2 + ;; + *) + echo "Unknown option: $1" + print_usage + exit 1 + ;; + esac +done + +# Function to clean up in case of error +cleanup() { + # Don't clean up temp files on error to help with debugging + echo "===== visionOS Validation Process Failed =====" + exit 1 +} + +# Set up trap to call cleanup function on error +trap cleanup ERR + +set -e # Exit on any error + +ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../.." && pwd )" +BUILD_DIR="${ROOT_DIR}/validation-builds/visionos" + +# Configuration +APP_NAME="VisionOSLlamaTest" +BUNDLE_ID="org.ggml.VisionOSLlamaTest" +XCFRAMEWORK_PATH="${ROOT_DIR}/build-apple/llama.xcframework" +TEMP_DIR="${BUILD_DIR}/temp" +ARCHIVE_PATH="${BUILD_DIR}/${APP_NAME}.xcarchive" +IPA_PATH="${BUILD_DIR}/${APP_NAME}.ipa" +VALIDATION_DIR="${BUILD_DIR}/validation" + +# Create necessary directories +mkdir -p "${BUILD_DIR}" +mkdir -p "${TEMP_DIR}" +mkdir -p "${VALIDATION_DIR}" + +echo "===== visionOS Validation Process Started =====" + +# 1. Create a simple test app project +echo "Creating test visionOS app project..." +mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}" +cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Info.plist" << EOF +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> +<plist version="1.0"> +<dict> + <key>CFBundleDevelopmentRegion</key> + <string>en</string> + <key>CFBundleExecutable</key> + <string>${APP_NAME}</string> + <key>CFBundleIdentifier</key> + <string>${BUNDLE_ID}</string> + <key>CFBundleInfoDictionaryVersion</key> + <string>6.0</string> + <key>CFBundleName</key> + <string>${APP_NAME}</string> + <key>CFBundlePackageType</key> + <string>APPL</string> + <key>CFBundleShortVersionString</key> + <string>1.0</string> + <key>CFBundleVersion</key> + <string>1</string> +</dict> +</plist> +EOF + +# Create SwiftUI app files +mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources" + +# Create App.swift +cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/App.swift" << EOF +import SwiftUI +import llama + +@main +struct LlamaTestApp: App { + var body: some Scene { + WindowGroup { + ContentView() + } + } +} +EOF + +# Create ContentView.swift with visionOS specific elements +cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/ContentView.swift" << EOF +import SwiftUI +import llama + +struct ContentView: View { + // Test that we can initialize a llama context params struct + let params = llama_context_default_params() + + var body: some View { + VStack(spacing: 20) { + Text("Llama Framework Test on visionOS") + .font(.largeTitle) + .padding() + + Text("llama_context_default_params() created successfully") + .font(.headline) + .multilineTextAlignment(.center) + .padding() + + // Display some param values to confirm the framework is working + Text("n_ctx: \(params.n_ctx)") + .font(.body) + + Text("n_batch: \(params.n_batch)") + .font(.body) + + Spacer() + } + .padding() + .frame(width: 500, height: 400) + } +} + +struct ContentView_Previews: PreviewProvider { + static var previews: some View { + ContentView() + } +} +EOF + +# Create project.pbxproj, fixing the framework search paths issues +mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj" +cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF' +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 54; + objects = { + +/* Begin PBXBuildFile section */ + 11111111111111111111111 /* App.swift in Sources */ = {isa = PBXBuildFile; fileRef = 22222222222222222222222; }; + 33333333333333333333333 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 44444444444444444444444; }; + 55555555555555555555555 /* llama.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; }; + 77777777777777777777777 /* llama.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; }; +/* End PBXBuildFile section */ + +/* Begin PBXCopyFilesBuildPhase section */ + 88888888888888888888888 /* Embed Frameworks */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = ""; + dstSubfolderSpec = 10; + files = ( + 77777777777777777777777 /* llama.xcframework in Embed Frameworks */, + ); + name = "Embed Frameworks"; + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXCopyFilesBuildPhase section */ + +/* Begin PBXFileReference section */ +EOF + +# Continue with the project.pbxproj file, using the APP_NAME variable appropriately +cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF + 99999999999999999999999 /* ${APP_NAME}.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "${APP_NAME}.app"; sourceTree = BUILT_PRODUCTS_DIR; }; + 22222222222222222222222 /* App.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = App.swift; sourceTree = "<group>"; }; + 44444444444444444444444 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; }; + AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; }; + 66666666666666666666666 /* llama.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; path = llama.xcframework; sourceTree = "<group>"; }; +/* End PBXFileReference section */ +EOF + +# Add the rest of the project file with fixed framework search paths +cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF' +/* Begin PBXFrameworksBuildPhase section */ + BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 55555555555555555555555 /* llama.xcframework in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ +EOF + +# Continue with the project.pbxproj file, using the APP_NAME variable appropriately +cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF + CCCCCCCCCCCCCCCCCCCCCCCC /* Products */ = { + isa = PBXGroup; + children = ( + 99999999999999999999999 /* ${APP_NAME}.app */, + ); + name = Products; + sourceTree = "<group>"; + }; +EOF + +cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF' + DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */ = { + isa = PBXGroup; + children = ( + 66666666666666666666666 /* llama.xcframework */, + ); + name = Frameworks; + sourceTree = "<group>"; + }; + EEEEEEEEEEEEEEEEEEEEEEEE = { + isa = PBXGroup; + children = ( + FFFFFFFFFFFFFFFFFFFFFFFF /* VisionOSLlamaTest */, + CCCCCCCCCCCCCCCCCCCCCCCC /* Products */, + DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */, + ); + sourceTree = "<group>"; + }; + FFFFFFFFFFFFFFFFFFFFFFFF /* VisionOSLlamaTest */ = { + isa = PBXGroup; + children = ( + 1111111111111111111111AA /* Sources */, + AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */, + ); + path = "VisionOSLlamaTest"; + sourceTree = "<group>"; + }; + 1111111111111111111111AA /* Sources */ = { + isa = PBXGroup; + children = ( + 22222222222222222222222 /* App.swift */, + 44444444444444444444444 /* ContentView.swift */, + ); + path = Sources; + sourceTree = "<group>"; + }; +/* End PBXGroup section */ +EOF + +# Continue with the project.pbxproj file, using the APP_NAME variable appropriately +cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF +/* Begin PBXNativeTarget section */ + 3333333333333333333333AA /* ${APP_NAME} */ = { + isa = PBXNativeTarget; + buildConfigurationList = 4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */; + buildPhases = ( + 5555555555555555555555AA /* Sources */, + BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */, + 6666666666666666666666AA /* Resources */, + 88888888888888888888888 /* Embed Frameworks */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = "${APP_NAME}"; + productName = "${APP_NAME}"; + productReference = 99999999999999999999999 /* ${APP_NAME}.app */; + productType = "com.apple.product-type.application"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 7777777777777777777777AA /* Project object */ = { + isa = PBXProject; + attributes = { + LastSwiftUpdateCheck = 1510; + LastUpgradeCheck = 1510; + TargetAttributes = { + 3333333333333333333333AA = { + CreatedOnToolsVersion = 15.1; + }; + }; + }; + buildConfigurationList = 8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */; + compatibilityVersion = "Xcode 15.0"; + developmentRegion = en; + hasScannedForEncodings = 0; + knownRegions = ( + en, + Base, + ); + mainGroup = EEEEEEEEEEEEEEEEEEEEEEEE; + productRefGroup = CCCCCCCCCCCCCCCCCCCCCCCC /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 3333333333333333333333AA /* ${APP_NAME} */, + ); + }; +/* End PBXProject section */ +EOF + +# Add the rest of the file with correct FRAMEWORK_SEARCH_PATHS +cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF' +/* Begin PBXResourcesBuildPhase section */ + 6666666666666666666666AA /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + 5555555555555555555555AA /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 33333333333333333333333 /* ContentView.swift in Sources */, + 11111111111111111111111 /* App.swift in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + 9999999999999999999999AA /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; + MTL_FAST_MATH = YES; + ONLY_ACTIVE_ARCH = YES; + SDKROOT = xros; + SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG; + SWIFT_OPTIMIZATION_LEVEL = "-Onone"; + XROS_DEPLOYMENT_TARGET = 1.0; + }; + name = Debug; + }; + AAAAAAAAAAAAAAAAAAAAABBB /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + MTL_ENABLE_DEBUG_INFO = NO; + MTL_FAST_MATH = YES; + SDKROOT = xros; + SWIFT_COMPILATION_MODE = wholemodule; + SWIFT_OPTIMIZATION_LEVEL = "-O"; + VALIDATE_PRODUCT = YES; + XROS_DEPLOYMENT_TARGET = 1.0; + }; + name = Release; + }; + BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; + CODE_SIGN_STYLE = Manual; + DEVELOPMENT_TEAM = ""; + ENABLE_PREVIEWS = YES; + FRAMEWORK_SEARCH_PATHS = "$(PROJECT_DIR)"; + INFOPLIST_FILE = "VisionOSLlamaTest/Info.plist"; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + ); + PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.VisionOSLlamaTest"; + PRODUCT_NAME = "$(TARGET_NAME)"; + PROVISIONING_PROFILE_SPECIFIER = ""; + SUPPORTED_PLATFORMS = "xros xrsimulator"; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2,7"; + }; + name = Debug; + }; + CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; + CODE_SIGN_STYLE = Manual; + DEVELOPMENT_TEAM = ""; + ENABLE_PREVIEWS = YES; + FRAMEWORK_SEARCH_PATHS = ( + "$(inherited)", + "$(PROJECT_DIR)", + ); + INFOPLIST_FILE = "VisionOSLlamaTest/Info.plist"; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + ); + PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.VisionOSLlamaTest"; + PRODUCT_NAME = "$(TARGET_NAME)"; + PROVISIONING_PROFILE_SPECIFIER = ""; + SUPPORTED_PLATFORMS = "xros xrsimulator"; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2,7"; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ +EOF + +# Finish the project.pbxproj file +cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF +/* Begin XCConfigurationList section */ + 8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 9999999999999999999999AA /* Debug */, + AAAAAAAAAAAAAAAAAAAAABBB /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */, + CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 7777777777777777777777AA /* Project object */; +} +EOF + +# 2. Copy XCFramework to test project +echo "Copying XCFramework to test project..." +cp -R "${XCFRAMEWORK_PATH}" "${TEMP_DIR}/${APP_NAME}/" + +# 3. Build and archive the app +echo "Building and archiving test app..." +cd "${TEMP_DIR}/${APP_NAME}" + +# Create a simple xcscheme file to avoid xcodebuild scheme issues +mkdir -p "${APP_NAME}.xcodeproj/xcshareddata/xcschemes" +cat > "${APP_NAME}.xcodeproj/xcshareddata/xcschemes/${APP_NAME}.xcscheme" << EOF +<?xml version="1.0" encoding="UTF-8"?> +<Scheme + LastUpgradeVersion = "1510" + version = "1.3"> + <BuildAction + parallelizeBuildables = "YES" + buildImplicitDependencies = "YES"> + <BuildActionEntries> + <BuildActionEntry + buildForTesting = "YES" + buildForRunning = "YES" + buildForProfiling = "YES" + buildForArchiving = "YES" + buildForAnalyzing = "YES"> + <BuildableReference + BuildableIdentifier = "primary" + BlueprintIdentifier = "3333333333333333333333AA" + BuildableName = "${APP_NAME}.app" + BlueprintName = "${APP_NAME}" + ReferencedContainer = "container:${APP_NAME}.xcodeproj"> + </BuildableReference> + </BuildActionEntry> + </BuildActionEntries> + </BuildAction> + <TestAction + buildConfiguration = "Debug" + selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB" + selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB" + shouldUseLaunchSchemeArgsEnv = "YES"> + <Testables> + </Testables> + </TestAction> + <LaunchAction + buildConfiguration = "Debug" + selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB" + selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB" + launchStyle = "0" + useCustomWorkingDirectory = "NO" + ignoresPersistentStateOnLaunch = "NO" + debugDocumentVersioning = "YES" + debugServiceExtension = "internal" + allowLocationSimulation = "YES"> + <BuildableProductRunnable + runnableDebuggingMode = "0"> + <BuildableReference + BuildableIdentifier = "primary" + BlueprintIdentifier = "3333333333333333333333AA" + BuildableName = "${APP_NAME}.app" + BlueprintName = "${APP_NAME}" + ReferencedContainer = "container:${APP_NAME}.xcodeproj"> + </BuildableReference> + </BuildableProductRunnable> + </LaunchAction> + <ProfileAction + buildConfiguration = "Release" + shouldUseLaunchSchemeArgsEnv = "YES" + savedToolIdentifier = "" + useCustomWorkingDirectory = "NO" + debugDocumentVersioning = "YES"> + <BuildableProductRunnable + runnableDebuggingMode = "0"> + <BuildableReference + BuildableIdentifier = "primary" + BlueprintIdentifier = "3333333333333333333333AA" + BuildableName = "${APP_NAME}.app" + BlueprintName = "${APP_NAME}" + ReferencedContainer = "container:${APP_NAME}.xcodeproj"> + </BuildableReference> + </BuildableProductRunnable> + </ProfileAction> + <AnalyzeAction + buildConfiguration = "Debug"> + </AnalyzeAction> + <ArchiveAction + buildConfiguration = "Release" + revealArchiveInOrganizer = "YES"> + </ArchiveAction> +</Scheme> +EOF + +# Now use xcodebuild with an explicitly defined product name for visionOS +xcodebuild -project "${APP_NAME}.xcodeproj" -scheme "${APP_NAME}" -sdk xros -configuration Release archive -archivePath "${ARCHIVE_PATH}" CODE_SIGN_IDENTITY="-" CODE_SIGNING_REQUIRED=NO CODE_SIGNING_ALLOWED=NO PRODUCT_NAME="${APP_NAME}" SWIFT_OPTIMIZATION_LEVEL="-Onone" -quiet + +# 4. Create IPA from archive +echo "Creating IPA from archive..." +mkdir -p "${TEMP_DIR}/Payload" +cp -R "${ARCHIVE_PATH}/Products/Applications/${APP_NAME}.app" "${TEMP_DIR}/Payload/" + +# Check and log app structure before zipping +echo "App structure:" +ls -la "${TEMP_DIR}/Payload/${APP_NAME}.app/" +echo "Frameworks:" +ls -la "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/" 2>/dev/null || echo "No Frameworks directory found" + +cd "${TEMP_DIR}" +zip -r "${IPA_PATH}" Payload + +# Check embedded provisioning profile +echo "Checking provisioning profile (if any)..." +PROVISIONING_PROFILE=$(find "${ARCHIVE_PATH}/Products/Applications/${APP_NAME}.app" -name "embedded.mobileprovision" 2>/dev/null) +if [ -n "$PROVISIONING_PROFILE" ]; then + echo "Found embedded provisioning profile:" + security cms -D -i "$PROVISIONING_PROFILE" || echo "Unable to decode provisioning profile" +else + echo "No embedded provisioning profile found (expected for ad-hoc builds)" +fi + +# 5. Validate the IPA +echo "Validating IPA..." +VALIDATION_OUTPUT="${VALIDATION_DIR}/validation_output.txt" + +# Check if authentication credentials are provided +AUTH_ARGS="" +if [ -n "$APPLE_ID" ] && [ -n "$APPLE_PASSWORD" ]; then + echo "Using Apple ID authentication for validation..." + AUTH_ARGS="--username \"$APPLE_ID\" --password \"$APPLE_PASSWORD\"" +else + echo "No authentication credentials provided. Will perform basic validation." + echo "To use your personal developer account, you can run the script with:" + echo " APPLE_ID='your.email@example.com' APPLE_PASSWORD='your-app-specific-password' ./validate-visionos.sh" + echo "Note: You need to create an app-specific password at https://appleid.apple.com/account/manage" +fi + +# Run validation with detailed output +echo "Running validation with altool..." +if [ -n "$AUTH_ARGS" ]; then + # Use eval to properly handle the quoted arguments + eval "xcrun altool --validate-app -f \"${IPA_PATH}\" --type visionos --output-format xml $AUTH_ARGS" 2>&1 | tee "${VALIDATION_OUTPUT}" +else + xcrun altool --validate-app -f "${IPA_PATH}" --type visionos --output-format xml 2>&1 | tee "${VALIDATION_OUTPUT}" +fi +VALIDATION_RESULT=$? + +# Final validation result +FINAL_VALIDATION_RESULT=0 + +# Check if validation failed because the app isn't in App Store Connect +if grep -q "No suitable application records were found" "${VALIDATION_OUTPUT}"; then + echo "⚠️ App Store Connect Warning: The app bundle identifier is not found in App Store Connect" + echo "This is expected for apps that haven't been registered in App Store Connect yet." + echo "This doesn't indicate a problem with the build or framework." + + # Perform alternative validation + echo "Performing alternative validation checks..." + + # Check if IPA was created successfully + if [ -f "${IPA_PATH}" ] && [ -s "${IPA_PATH}" ]; then + echo "✅ IPA file created successfully" + else + echo "❌ IPA file not created or empty" + FINAL_VALIDATION_RESULT=1 + fi + + # Check if app binary exists and is executable + if [ -f "${TEMP_DIR}/Payload/${APP_NAME}.app/${APP_NAME}" ] && [ -x "${TEMP_DIR}/Payload/${APP_NAME}.app/${APP_NAME}" ]; then + echo "✅ App binary exists and is executable" + else + echo "❌ App binary missing or not executable" + FINAL_VALIDATION_RESULT=1 + fi + + # Check if framework was properly embedded + if [ -d "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework" ]; then + echo "✅ llama.framework properly embedded" + else + echo "❌ llama.framework not properly embedded" + FINAL_VALIDATION_RESULT=1 + fi + + # Check if framework binary exists + if [ -f "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework/llama" ]; then + echo "✅ Framework binary exists" + + # Further validate framework by checking architecture + ARCHS=$(lipo -info "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework/llama" 2>/dev/null | grep -o "arm64\\|x86_64" | tr '\n' ' ') + if [ -n "$ARCHS" ]; then + echo "✅ Framework architecture(s): $ARCHS" + else + echo "⚠️ Could not determine framework architecture" + fi + else + echo "❌ Framework binary missing" + FINAL_VALIDATION_RESULT=1 + fi + + if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then + echo "✅ Alternative validation PASSED: App built successfully with embedded framework" + else + echo "❌ Alternative validation FAILED: Issues found with the app or framework" + fi +elif grep -q "You must specify authentication credentials" "${VALIDATION_OUTPUT}" && [ -z "$AUTH_ARGS" ]; then + echo "✅ visionOS Validation PASSED: IPA successfully validated" + echo "Results saved to ${VALIDATION_OUTPUT}" +else + echo "❌ visionOS Validation FAILED: IPA validation found issues" + echo "See validation output at ${VALIDATION_OUTPUT}" + echo "" + echo "==== VALIDATION ERRORS ====" + + # Try to extract specific errors from the output + if grep -q "Error" "${VALIDATION_OUTPUT}"; then + grep -A 5 "Error" "${VALIDATION_OUTPUT}" + else + # If no specific error found, show the whole log + cat "${VALIDATION_OUTPUT}" + fi + + # Additional debugging: check IPA contents + echo "" + echo "==== IPA CONTENTS ====" + mkdir -p "${TEMP_DIR}/ipa_contents" + unzip -q "${IPA_PATH}" -d "${TEMP_DIR}/ipa_contents" + ls -la "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app/" + + # Check for code signing issues + echo "" + echo "==== CODE SIGNING INFO ====" + codesign -vv -d "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app" 2>&1 || echo "Code signing verification failed" + + # Check embedded frameworks + echo "" + echo "==== FRAMEWORK INFO ====" + ls -la "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app/Frameworks/" 2>/dev/null || echo "No Frameworks directory found" +fi + +# Don't clean up on error to allow inspection +if [ $FINAL_VALIDATION_RESULT -ne 0 ]; then + echo "" + echo "Temporary files kept for inspection at: ${TEMP_DIR}" + echo "===== visionOS Validation Process Failed =====" + exit 1 +fi + +# Clean up temporary files but keep build artifacts +if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then + echo "Cleaning up temporary files..." + #rm -rf "${TEMP_DIR}" +fi + +echo "===== visionOS Validation Process Completed =====" +exit $FINAL_VALIDATION_RESULT diff --git a/llama.cpp/scripts/bench-models.sh b/llama.cpp/scripts/bench-models.sh new file mode 100755 index 0000000..c241013 --- /dev/null +++ b/llama.cpp/scripts/bench-models.sh @@ -0,0 +1,82 @@ +#!/usr/bin/env bash + +RESULTS="bench-models-results.txt" +: > "$RESULTS" + +ARGS_BB="-c 270336 -npp 512,4096,8192 -npl 1,2,4,8,16,32 -ntg 32" +ARGS_B="-d 0,4096,8192,16384,32768 -p 2048 -n 32" + +QUICK=0 +DIO=0 +while (( "$#" )); do + case "$1" in + --quick) QUICK=1; shift ;; + --dio) DIO=1; shift ;; + *) shift ;; + esac +done + +if (( QUICK )); then + ARGS_BB="-c 20480 -npp 512,4096 -npl 1,2,4 -ntg 32" + ARGS_B="-d 0 -p 2048 -n 32" +fi + +if (( DIO )); then + ARGS_BB="${ARGS_BB} --no-mmap --direct-io" + ARGS_B="${ARGS_B} -mmp 0 -dio 1" +fi + +run_model() { + local HFR=$1 + local HFF=$2 + + printf "## ${HFR}\n" | tee -a "$RESULTS" + printf "\n" | tee -a "$RESULTS" + printf "Model: https://huggingface.co/${HFR}\n" | tee -a "$RESULTS" + printf "\n" | tee -a "$RESULTS" + + printf -- "- \`llama-batched-bench\`\n" | tee -a "$RESULTS" + printf "\n" | tee -a "$RESULTS" + + ./bin/llama-batched-bench \ + -hfr "${HFR}" -hff "${HFF}" \ + -m "${HFF}" -fa 1 -ub 2048 \ + ${ARGS_BB} | tee -a "$RESULTS" + + printf "\n" | tee -a "$RESULTS" + + printf -- "- \`llama-bench\`\n" | tee -a "$RESULTS" + printf "\n" | tee -a "$RESULTS" + + ./bin/llama-bench \ + -m "${HFF}" -fa 1 -ub 2048 \ + ${ARGS_B} | tee -a "$RESULTS" + + printf "\n" | tee -a "$RESULTS" + + printf "\n" +} + +run_model "ggml-org/gpt-oss-20b-GGUF" "gpt-oss-20b-mxfp4.gguf" +run_model "ggml-org/gpt-oss-120b-GGUF" "gpt-oss-120b-mxfp4-00001-of-00003.gguf" +run_model "ggml-org/Qwen3-Coder-30B-A3B-Instruct-Q8_0-GGUF" "qwen3-coder-30b-a3b-instruct-q8_0.gguf" +run_model "ggml-org/Qwen2.5-Coder-7B-Q8_0-GGUF" "qwen2.5-coder-7b-q8_0.gguf" +run_model "ggml-org/gemma-3-4b-it-qat-GGUF" "gemma-3-4b-it-qat-Q4_0.gguf" +run_model "ggml-org/GLM-4.7-Flash-GGUF" "GLM-4.7-Flash-Q8_0.gguf" + +if [[ -f models-extra.txt ]]; then + while read -r HFR HFF; do + [[ -z "$HFR" ]] && continue + run_model "$HFR" "$HFF" + done < models-extra.txt +fi + +printf "\n=====================================\n" +printf "\n" + +cat "$RESULTS" + +printf "\n" +printf "Done! Results are written to $RESULTS\n" +printf "\n" + diff --git a/llama.cpp/scripts/build-info.sh b/llama.cpp/scripts/build-info.sh new file mode 100755 index 0000000..fa9e7ba --- /dev/null +++ b/llama.cpp/scripts/build-info.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +CC=$1 + +build_number="0" +build_commit="unknown" +build_compiler="unknown" +build_target="unknown" + +if out=$(git rev-list --count HEAD); then + # git is broken on WSL so we need to strip extra newlines + build_number=$(printf '%s' "$out" | tr -d '\n') +fi + +if out=$(git rev-parse --short HEAD); then + build_commit=$(printf '%s' "$out" | tr -d '\n') +fi + +if out=$($CC --version | head -1); then + build_compiler=$out +fi + +if out=$($CC -dumpmachine); then + build_target=$out +fi + +echo "int LLAMA_BUILD_NUMBER = ${build_number};" +echo "char const *LLAMA_COMMIT = \"${build_commit}\";" +echo "char const *LLAMA_COMPILER = \"${build_compiler}\";" +echo "char const *LLAMA_BUILD_TARGET = \"${build_target}\";" diff --git a/llama.cpp/scripts/check-requirements.sh b/llama.cpp/scripts/check-requirements.sh new file mode 100755 index 0000000..da2357d --- /dev/null +++ b/llama.cpp/scripts/check-requirements.sh @@ -0,0 +1,179 @@ +#!/usr/bin/env bash +set -euo pipefail + +# +# check-requirements.sh checks all requirements files for each top-level +# convert*.py script. +# +# WARNING: This is quite IO intensive, because a fresh venv is set up for every +# python script. As of 2023-12-22, this writes ~2.7GB of data. An adequately +# sized tmpfs /tmp or ramdisk is recommended if running this frequently. +# +# usage: check-requirements.sh [<working_dir>] +# check-requirements.sh nocleanup [<working_dir>] +# +# where: +# - <working_dir> is a directory that can be used as the base for +# setting up the venvs. Defaults to `/tmp`. +# - 'nocleanup' as the first argument will disable automatic cleanup +# of the files created by this script. +# +# requires: +# - bash >= 3.2.57 +# - shellcheck +# +# For each script, it creates a fresh venv, `pip install`s the requirements, and +# finally imports the python script to check for `ImportError`. +# + +log() { + local level=$1 msg=$2 + printf >&2 '%s: %s\n' "$level" "$msg" +} + +debug() { + log DEBUG "$@" +} + +info() { + log INFO "$@" +} + +fatal() { + log FATAL "$@" + exit 1 +} + +cleanup() { + if [[ -n ${workdir+x} && -d $workdir && -w $workdir ]]; then + info "Removing $workdir" + local count=0 + rm -rfv -- "$workdir" | while read -r; do + if (( count++ > 750 )); then + printf . + count=0 + fi + done + printf '\n' + info "Removed $workdir" + fi +} + +do_cleanup=1 +if [[ ${1-} == nocleanup ]]; then + do_cleanup=0; shift +fi + +if (( do_cleanup )); then + trap exit INT TERM + trap cleanup EXIT +fi + +this=$(realpath -- "$0"); readonly this +cd "$(dirname "$this")/.." # PWD should stay in llama.cpp project directory + +shellcheck "$this" + +readonly reqs_dir=requirements + +if [[ ${1+x} ]]; then + tmp_dir=$(realpath -- "$1") + if [[ ! ( -d $tmp_dir && -w $tmp_dir ) ]]; then + fatal "$tmp_dir is not a writable directory" + fi +else + tmp_dir=/tmp +fi + +workdir=$(mktemp -d "$tmp_dir/check-requirements.XXXX"); readonly workdir +info "Working directory: $workdir" + +check_requirements() { + local reqs=$1 + + info "$reqs: beginning check" + pip --disable-pip-version-check install -qr "$reqs" + info "$reqs: OK" +} + +check_convert_script() { + local py=$1 # e.g. ./convert_hf_to_gguf.py + local pyname=${py##*/} # e.g. convert_hf_to_gguf.py + pyname=${pyname%.py} # e.g. convert_hf_to_gguf + + info "$py: beginning check" + + local reqs="$reqs_dir/requirements-$pyname.txt" + if [[ ! -r $reqs ]]; then + fatal "$py missing requirements. Expected: $reqs" + fi + + # Check that all sub-requirements are added to top-level requirements.txt + if ! grep -qF "$reqs" requirements.txt; then + fatal "$reqs needs to be added to requirements.txt" + fi + + local venv="$workdir/$pyname-venv" + python3 -m venv "$venv" + + ( + # shellcheck source=/dev/null + source "$venv/bin/activate" + + check_requirements "$reqs" + + python - "$py" "$pyname" <<'EOF' +import sys +from importlib.machinery import SourceFileLoader +py, pyname = sys.argv[1:] +SourceFileLoader(pyname, py).load_module() +EOF + ) + + if (( do_cleanup )); then + rm -rf -- "$venv" + fi + + info "$py: imports OK" +} + +readonly ignore_eq_eq='check_requirements: ignore "=="' + +for req in */**/requirements*.txt; do + # Make sure exact release versions aren't being pinned in the requirements + # Filters out the ignore string + if grep -vF "$ignore_eq_eq" "$req" | grep -q '=='; then + tab=$'\t' + cat >&2 <<EOF +FATAL: Avoid pinning exact package versions. Use '~=' instead. +You can suppress this error by appending the following to the line: +$tab# $ignore_eq_eq +EOF + exit 1 + fi +done + +all_venv="$workdir/all-venv" +python3 -m venv "$all_venv" + +( + # shellcheck source=/dev/null + source "$all_venv/bin/activate" + check_requirements requirements.txt +) + +if (( do_cleanup )); then + rm -rf -- "$all_venv" +fi + +check_convert_script examples/convert_legacy_llama.py +for py in convert_*.py; do + # skip convert_hf_to_gguf_update.py + # TODO: the check is failing for some reason: + # https://github.com/ggml-org/llama.cpp/actions/runs/8875330981/job/24364557177?pr=6920 + [[ $py == convert_hf_to_gguf_update.py ]] && continue + + check_convert_script "$py" +done + +info 'Done! No issues found.' diff --git a/llama.cpp/scripts/compare-commits.sh b/llama.cpp/scripts/compare-commits.sh new file mode 100755 index 0000000..1802d6e --- /dev/null +++ b/llama.cpp/scripts/compare-commits.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash + +if [ $# -lt 2 ]; then + echo "usage: ./scripts/compare-commits.sh <commit1> <commit2> [tool] [additional arguments]" + echo " tool: 'llama-bench' (default) or 'test-backend-ops'" + echo " additional arguments: passed to the selected tool" + exit 1 +fi + +set -e +set -x + +# Parse arguments +commit1=$1 +commit2=$2 +tool=${3:-llama-bench} +additional_args="${@:4}" + +# Validate tool argument +if [ "$tool" != "llama-bench" ] && [ "$tool" != "test-backend-ops" ]; then + echo "Error: tool must be 'llama-bench' or 'test-backend-ops'" + exit 1 +fi + +# verify at the start that the compare script has all the necessary dependencies installed +./scripts/compare-llama-bench.py --check + +if ! command -v sqlite3 >/dev/null 2>&1; then + echo "Error: sqlite3 is not installed or not in PATH" + echo "Please install sqlite3 to use this script" + exit 1 +fi + +if [ "$tool" = "llama-bench" ]; then + db_file="llama-bench.sqlite" + target="llama-bench" + run_args="-o sql -oe md $additional_args" +else # test-backend-ops + db_file="test-backend-ops.sqlite" + target="test-backend-ops" + run_args="perf --output sql $additional_args" +fi + +rm -f "$db_file" > /dev/null + +# to test a backend, call the script with the corresponding environment variable (e.g. GGML_CUDA=1 ./scripts/compare-commits.sh ...) +if [ -n "$GGML_CUDA" ]; then + CMAKE_OPTS="${CMAKE_OPTS} -DGGML_CUDA=ON" +fi + +dir="build-bench" + +function run { + rm -fr ${dir} > /dev/null + cmake -B ${dir} -S . ${CMAKE_OPTS} > /dev/null + cmake --build ${dir} -t $target -j $(nproc) > /dev/null + ${dir}/bin/$target $run_args | sqlite3 "$db_file" +} + +git checkout $commit1 > /dev/null +run + +git checkout $commit2 > /dev/null +run + +./scripts/compare-llama-bench.py -b $commit1 -c $commit2 --tool $tool -i "$db_file" diff --git a/llama.cpp/scripts/compare-llama-bench.py b/llama.cpp/scripts/compare-llama-bench.py new file mode 100755 index 0000000..9541b89 --- /dev/null +++ b/llama.cpp/scripts/compare-llama-bench.py @@ -0,0 +1,1093 @@ +#!/usr/bin/env python3 + +import argparse +import csv +import heapq +import json +import logging +import os +import sqlite3 +import sys +from collections.abc import Iterator, Sequence +from glob import glob +from typing import Any, Optional, Union + +try: + import git + from tabulate import tabulate +except ImportError as e: + print("the following Python libraries are required: GitPython, tabulate.") # noqa: NP100 + raise e + + +logger = logging.getLogger("compare-llama-bench") + +# All llama-bench SQL fields +LLAMA_BENCH_DB_FIELDS = [ + "build_commit", "build_number", "cpu_info", "gpu_info", "backends", "model_filename", + "model_type", "model_size", "model_n_params", "n_batch", "n_ubatch", "n_threads", + "cpu_mask", "cpu_strict", "poll", "type_k", "type_v", "n_gpu_layers", + "split_mode", "main_gpu", "no_kv_offload", "flash_attn", "tensor_split", "tensor_buft_overrides", + "use_mmap", "embeddings", "no_op_offload", "n_prompt", "n_gen", "n_depth", + "test_time", "avg_ns", "stddev_ns", "avg_ts", "stddev_ts", "n_cpu_moe" +] + +LLAMA_BENCH_DB_TYPES = [ + "TEXT", "INTEGER", "TEXT", "TEXT", "TEXT", "TEXT", + "TEXT", "INTEGER", "INTEGER", "INTEGER", "INTEGER", "INTEGER", + "TEXT", "INTEGER", "INTEGER", "TEXT", "TEXT", "INTEGER", + "TEXT", "INTEGER", "INTEGER", "INTEGER", "TEXT", "TEXT", + "INTEGER", "INTEGER", "INTEGER", "INTEGER", "INTEGER", "INTEGER", + "TEXT", "INTEGER", "INTEGER", "REAL", "REAL", "INTEGER", +] + +# All test-backend-ops SQL fields +TEST_BACKEND_OPS_DB_FIELDS = [ + "test_time", "build_commit", "backend_name", "op_name", "op_params", "test_mode", + "supported", "passed", "error_message", "time_us", "flops", "bandwidth_gb_s", + "memory_kb", "n_runs" +] + +TEST_BACKEND_OPS_DB_TYPES = [ + "TEXT", "TEXT", "TEXT", "TEXT", "TEXT", "TEXT", + "INTEGER", "INTEGER", "TEXT", "REAL", "REAL", "REAL", + "INTEGER", "INTEGER" +] + +assert len(LLAMA_BENCH_DB_FIELDS) == len(LLAMA_BENCH_DB_TYPES) +assert len(TEST_BACKEND_OPS_DB_FIELDS) == len(TEST_BACKEND_OPS_DB_TYPES) + +# Properties by which to differentiate results per commit for llama-bench: +LLAMA_BENCH_KEY_PROPERTIES = [ + "cpu_info", "gpu_info", "backends", "n_gpu_layers", "n_cpu_moe", "tensor_buft_overrides", "model_filename", "model_type", + "n_batch", "n_ubatch", "embeddings", "cpu_mask", "cpu_strict", "poll", "n_threads", "type_k", "type_v", + "use_mmap", "no_kv_offload", "split_mode", "main_gpu", "tensor_split", "flash_attn", "n_prompt", "n_gen", "n_depth" +] + +# Properties by which to differentiate results per commit for test-backend-ops: +TEST_BACKEND_OPS_KEY_PROPERTIES = [ + "backend_name", "op_name", "op_params", "test_mode" +] + +# Properties that are boolean and are converted to Yes/No for the table: +LLAMA_BENCH_BOOL_PROPERTIES = ["embeddings", "cpu_strict", "use_mmap", "no_kv_offload", "flash_attn"] +TEST_BACKEND_OPS_BOOL_PROPERTIES = ["supported", "passed"] + +# Header names for the table (llama-bench): +LLAMA_BENCH_PRETTY_NAMES = { + "cpu_info": "CPU", "gpu_info": "GPU", "backends": "Backends", "n_gpu_layers": "GPU layers", + "tensor_buft_overrides": "Tensor overrides", "model_filename": "File", "model_type": "Model", "model_size": "Model size [GiB]", + "model_n_params": "Num. of par.", "n_batch": "Batch size", "n_ubatch": "Microbatch size", "embeddings": "Embeddings", + "cpu_mask": "CPU mask", "cpu_strict": "CPU strict", "poll": "Poll", "n_threads": "Threads", "type_k": "K type", "type_v": "V type", + "use_mmap": "Use mmap", "no_kv_offload": "NKVO", "split_mode": "Split mode", "main_gpu": "Main GPU", "tensor_split": "Tensor split", + "flash_attn": "FlashAttention", +} + +# Header names for the table (test-backend-ops): +TEST_BACKEND_OPS_PRETTY_NAMES = { + "backend_name": "Backend", "op_name": "GGML op", "op_params": "Op parameters", "test_mode": "Mode", + "supported": "Supported", "passed": "Passed", "error_message": "Error", + "flops": "FLOPS", "bandwidth_gb_s": "Bandwidth (GB/s)", "memory_kb": "Memory (KB)", "n_runs": "Runs" +} + +DEFAULT_SHOW_LLAMA_BENCH = ["model_type"] # Always show these properties by default. +DEFAULT_HIDE_LLAMA_BENCH = ["model_filename"] # Always hide these properties by default. + +DEFAULT_SHOW_TEST_BACKEND_OPS = ["backend_name", "op_name"] # Always show these properties by default. +DEFAULT_HIDE_TEST_BACKEND_OPS = ["error_message"] # Always hide these properties by default. + +GPU_NAME_STRIP = ["NVIDIA GeForce ", "Tesla ", "AMD Radeon ", "AMD Instinct "] # Strip prefixes for smaller tables. +MODEL_SUFFIX_REPLACE = {" - Small": "_S", " - Medium": "_M", " - Large": "_L"} + +DESCRIPTION = """Creates tables from llama-bench or test-backend-ops data written to multiple JSON/CSV files, a single JSONL file or SQLite database. Example usage (Linux): + +For llama-bench: +$ git checkout master +$ cmake -B ${BUILD_DIR} ${CMAKE_OPTS} && cmake --build ${BUILD_DIR} -t llama-bench -j $(nproc) +$ ./llama-bench -o sql | sqlite3 llama-bench.sqlite +$ git checkout some_branch +$ cmake -B ${BUILD_DIR} ${CMAKE_OPTS} && cmake --build ${BUILD_DIR} -t llama-bench -j $(nproc) +$ ./llama-bench -o sql | sqlite3 llama-bench.sqlite +$ ./scripts/compare-llama-bench.py + +For test-backend-ops: +$ git checkout master +$ cmake -B ${BUILD_DIR} ${CMAKE_OPTS} && cmake --build ${BUILD_DIR} -t test-backend-ops -j $(nproc) +$ ./test-backend-ops perf --output sql | sqlite3 test-backend-ops.sqlite +$ git checkout some_branch +$ cmake -B ${BUILD_DIR} ${CMAKE_OPTS} && cmake --build ${BUILD_DIR} -t test-backend-ops -j $(nproc) +$ ./test-backend-ops perf --output sql | sqlite3 test-backend-ops.sqlite +$ ./scripts/compare-llama-bench.py --tool test-backend-ops -i test-backend-ops.sqlite + +Performance numbers from multiple runs per commit are averaged WITHOUT being weighted by the --repetitions parameter of llama-bench. +""" + +parser = argparse.ArgumentParser( + description=DESCRIPTION, formatter_class=argparse.RawDescriptionHelpFormatter) +help_b = ( + "The baseline commit to compare performance to. " + "Accepts either a branch name, tag name, or commit hash. " + "Defaults to latest master commit with data." +) +parser.add_argument("-b", "--baseline", help=help_b) +help_c = ( + "The commit whose performance is to be compared to the baseline. " + "Accepts either a branch name, tag name, or commit hash. " + "Defaults to the non-master commit for which llama-bench was run most recently." +) +parser.add_argument("-c", "--compare", help=help_c) +help_t = ( + "The tool whose data is being compared. " + "Either 'llama-bench' or 'test-backend-ops'. " + "This determines the database schema and comparison logic used. " + "If left unspecified, try to determine from the input file." +) +parser.add_argument("-t", "--tool", help=help_t, default=None, choices=[None, "llama-bench", "test-backend-ops"]) +help_i = ( + "JSON/JSONL/SQLite/CSV files for comparing commits. " + "Specify multiple times to use multiple input files (JSON/CSV only). " + "Defaults to 'llama-bench.sqlite' in the current working directory. " + "If no such file is found and there is exactly one .sqlite file in the current directory, " + "that file is instead used as input." +) +parser.add_argument("-i", "--input", action="append", help=help_i) +help_o = ( + "Output format for the table. " + "Defaults to 'pipe' (GitHub compatible). " + "Also supports e.g. 'latex' or 'mediawiki'. " + "See tabulate documentation for full list." +) +parser.add_argument("-o", "--output", help=help_o, default="pipe") +help_s = ( + "Columns to add to the table. " + "Accepts a comma-separated list of values. " + f"Legal values for test-backend-ops: {', '.join(TEST_BACKEND_OPS_KEY_PROPERTIES)}. " + f"Legal values for llama-bench: {', '.join(LLAMA_BENCH_KEY_PROPERTIES[:-3])}. " + "Defaults to model name (model_type) and CPU and/or GPU name (cpu_info, gpu_info) " + "plus any column where not all data points are the same. " + "If the columns are manually specified, then the results for each unique combination of the " + "specified values are averaged WITHOUT weighing by the --repetitions parameter of llama-bench." +) +parser.add_argument("--check", action="store_true", help="check if all required Python libraries are installed") +parser.add_argument("-s", "--show", help=help_s) +parser.add_argument("--verbose", action="store_true", help="increase output verbosity") +parser.add_argument("--plot", help="generate a performance comparison plot and save to specified file (e.g., plot.png)") +parser.add_argument("--plot_x", help="parameter to use as x axis for plotting (default: n_depth)", default="n_depth") +parser.add_argument("--plot_log_scale", action="store_true", help="use log scale for x axis in plots (off by default)") + +known_args, unknown_args = parser.parse_known_args() + +logging.basicConfig(level=logging.DEBUG if known_args.verbose else logging.INFO) + + +if known_args.check: + # Check if all required Python libraries are installed. Would have failed earlier if not. + sys.exit(0) + +if unknown_args: + logger.error(f"Received unknown args: {unknown_args}.\n") + parser.print_help() + sys.exit(1) + +input_file = known_args.input +tool = known_args.tool + +if not input_file: + if tool == "llama-bench" and os.path.exists("./llama-bench.sqlite"): + input_file = ["llama-bench.sqlite"] + elif tool == "test-backend-ops" and os.path.exists("./test-backend-ops.sqlite"): + input_file = ["test-backend-ops.sqlite"] + +if not input_file: + sqlite_files = glob("*.sqlite") + if len(sqlite_files) == 1: + input_file = sqlite_files + +if not input_file: + logger.error("Cannot find a suitable input file, please provide one.\n") + parser.print_help() + sys.exit(1) + + +class LlamaBenchData: + repo: Optional[git.Repo] + build_len_min: int + build_len_max: int + build_len: int = 8 + builds: list[str] = [] + tool: str = "llama-bench" # Tool type: "llama-bench" or "test-backend-ops" + + def __init__(self, tool: str = "llama-bench"): + self.tool = tool + try: + self.repo = git.Repo(".", search_parent_directories=True) + except git.InvalidGitRepositoryError: + self.repo = None + + # Set schema-specific properties based on tool + if self.tool == "llama-bench": + self.check_keys = set(LLAMA_BENCH_KEY_PROPERTIES + ["build_commit", "test_time", "avg_ts"]) + elif self.tool == "test-backend-ops": + self.check_keys = set(TEST_BACKEND_OPS_KEY_PROPERTIES + ["build_commit", "test_time"]) + else: + assert False + + def _builds_init(self): + self.build_len = self.build_len_min + + def _check_keys(self, keys: set) -> Optional[set]: + """Private helper method that checks against required data keys and returns missing ones.""" + if not keys >= self.check_keys: + return self.check_keys - keys + return None + + def find_parent_in_data(self, commit: git.Commit) -> Optional[str]: + """Helper method to find the most recent parent measured in number of commits for which there is data.""" + heap: list[tuple[int, git.Commit]] = [(0, commit)] + seen_hexsha8 = set() + while heap: + depth, current_commit = heapq.heappop(heap) + current_hexsha8 = commit.hexsha[:self.build_len] + if current_hexsha8 in self.builds: + return current_hexsha8 + for parent in commit.parents: + parent_hexsha8 = parent.hexsha[:self.build_len] + if parent_hexsha8 not in seen_hexsha8: + seen_hexsha8.add(parent_hexsha8) + heapq.heappush(heap, (depth + 1, parent)) + return None + + def get_all_parent_hexsha8s(self, commit: git.Commit) -> Sequence[str]: + """Helper method to recursively get hexsha8 values for all parents of a commit.""" + unvisited = [commit] + visited = [] + + while unvisited: + current_commit = unvisited.pop(0) + visited.append(current_commit.hexsha[:self.build_len]) + for parent in current_commit.parents: + if parent.hexsha[:self.build_len] not in visited: + unvisited.append(parent) + + return visited + + def get_commit_name(self, hexsha8: str) -> str: + """Helper method to find a human-readable name for a commit if possible.""" + if self.repo is None: + return hexsha8 + for h in self.repo.heads: + if h.commit.hexsha[:self.build_len] == hexsha8: + return h.name + for t in self.repo.tags: + if t.commit.hexsha[:self.build_len] == hexsha8: + return t.name + return hexsha8 + + def get_commit_hexsha8(self, name: str) -> Optional[str]: + """Helper method to search for a commit given a human-readable name.""" + if self.repo is None: + return None + for h in self.repo.heads: + if h.name == name: + return h.commit.hexsha[:self.build_len] + for t in self.repo.tags: + if t.name == name: + return t.commit.hexsha[:self.build_len] + for c in self.repo.iter_commits("--all"): + if c.hexsha[:self.build_len] == name[:self.build_len]: + return c.hexsha[:self.build_len] + return None + + def builds_timestamp(self, reverse: bool = False) -> Union[Iterator[tuple], Sequence[tuple]]: + """Helper method that gets rows of (build_commit, test_time) sorted by the latter.""" + return [] + + def get_rows(self, properties: list[str], hexsha8_baseline: str, hexsha8_compare: str) -> Sequence[tuple]: + """ + Helper method that gets table rows for some list of properties. + Rows are created by combining those where all provided properties are equal. + The resulting rows are then grouped by the provided properties and the t/s values are averaged. + The returned rows are unique in terms of property combinations. + """ + return [] + + +class LlamaBenchDataSQLite3(LlamaBenchData): + connection: Optional[sqlite3.Connection] = None + cursor: sqlite3.Cursor + table_name: str + + def __init__(self, tool: str = "llama-bench"): + super().__init__(tool) + if self.connection is None: + self.connection = sqlite3.connect(":memory:") + self.cursor = self.connection.cursor() + + # Set table name and schema based on tool + if self.tool == "llama-bench": + self.table_name = "llama_bench" + db_fields = LLAMA_BENCH_DB_FIELDS + db_types = LLAMA_BENCH_DB_TYPES + elif self.tool == "test-backend-ops": + self.table_name = "test_backend_ops" + db_fields = TEST_BACKEND_OPS_DB_FIELDS + db_types = TEST_BACKEND_OPS_DB_TYPES + else: + assert False + + self.cursor.execute(f"CREATE TABLE {self.table_name}({', '.join(' '.join(x) for x in zip(db_fields, db_types))});") + + def _builds_init(self): + if self.connection: + self.build_len_min = self.cursor.execute(f"SELECT MIN(LENGTH(build_commit)) from {self.table_name};").fetchone()[0] + self.build_len_max = self.cursor.execute(f"SELECT MAX(LENGTH(build_commit)) from {self.table_name};").fetchone()[0] + + if self.build_len_min != self.build_len_max: + logger.warning("Data contains commit hashes of differing lengths. It's possible that the wrong commits will be compared. " + "Try purging the the database of old commits.") + self.cursor.execute(f"UPDATE {self.table_name} SET build_commit = SUBSTRING(build_commit, 1, {self.build_len_min});") + + builds = self.cursor.execute(f"SELECT DISTINCT build_commit FROM {self.table_name};").fetchall() + self.builds = list(map(lambda b: b[0], builds)) # list[tuple[str]] -> list[str] + super()._builds_init() + + def builds_timestamp(self, reverse: bool = False) -> Union[Iterator[tuple], Sequence[tuple]]: + data = self.cursor.execute( + f"SELECT build_commit, test_time FROM {self.table_name} ORDER BY test_time;").fetchall() + return reversed(data) if reverse else data + + def get_rows(self, properties: list[str], hexsha8_baseline: str, hexsha8_compare: str) -> Sequence[tuple]: + if self.tool == "llama-bench": + return self._get_rows_llama_bench(properties, hexsha8_baseline, hexsha8_compare) + elif self.tool == "test-backend-ops": + return self._get_rows_test_backend_ops(properties, hexsha8_baseline, hexsha8_compare) + else: + assert False + + def _get_rows_llama_bench(self, properties: list[str], hexsha8_baseline: str, hexsha8_compare: str) -> Sequence[tuple]: + select_string = ", ".join( + [f"tb.{p}" for p in properties] + ["tb.n_prompt", "tb.n_gen", "tb.n_depth", "AVG(tb.avg_ts)", "AVG(tc.avg_ts)"]) + equal_string = " AND ".join( + [f"tb.{p} = tc.{p}" for p in LLAMA_BENCH_KEY_PROPERTIES] + [ + f"tb.build_commit = '{hexsha8_baseline}'", f"tc.build_commit = '{hexsha8_compare}'"] + ) + group_order_string = ", ".join([f"tb.{p}" for p in properties] + ["tb.n_gen", "tb.n_prompt", "tb.n_depth"]) + query = (f"SELECT {select_string} FROM {self.table_name} tb JOIN {self.table_name} tc ON {equal_string} " + f"GROUP BY {group_order_string} ORDER BY {group_order_string};") + return self.cursor.execute(query).fetchall() + + def _get_rows_test_backend_ops(self, properties: list[str], hexsha8_baseline: str, hexsha8_compare: str) -> Sequence[tuple]: + # For test-backend-ops, we compare FLOPS and bandwidth metrics (prioritizing FLOPS over bandwidth) + select_string = ", ".join( + [f"tb.{p}" for p in properties] + [ + "AVG(tb.flops)", "AVG(tc.flops)", + "AVG(tb.bandwidth_gb_s)", "AVG(tc.bandwidth_gb_s)" + ]) + equal_string = " AND ".join( + [f"tb.{p} = tc.{p}" for p in TEST_BACKEND_OPS_KEY_PROPERTIES] + [ + f"tb.build_commit = '{hexsha8_baseline}'", f"tc.build_commit = '{hexsha8_compare}'", + "tb.supported = 1", "tc.supported = 1", "tb.passed = 1", "tc.passed = 1"] # Only compare successful tests + ) + group_order_string = ", ".join([f"tb.{p}" for p in properties]) + query = (f"SELECT {select_string} FROM {self.table_name} tb JOIN {self.table_name} tc ON {equal_string} " + f"GROUP BY {group_order_string} ORDER BY {group_order_string};") + return self.cursor.execute(query).fetchall() + + +class LlamaBenchDataSQLite3File(LlamaBenchDataSQLite3): + def __init__(self, data_file: str, tool: Any): + self.connection = sqlite3.connect(data_file) + self.cursor = self.connection.cursor() + + # Check which table exists in the database + tables = self.cursor.execute("SELECT name FROM sqlite_master WHERE type='table';").fetchall() + table_names = [table[0] for table in tables] + + # Tool selection logic + if tool is None: + if "llama_bench" in table_names: + self.table_name = "llama_bench" + tool = "llama-bench" + elif "test_backend_ops" in table_names: + self.table_name = "test_backend_ops" + tool = "test-backend-ops" + else: + raise RuntimeError(f"No suitable table found in database. Available tables: {table_names}") + elif tool == "llama-bench": + if "llama_bench" in table_names: + self.table_name = "llama_bench" + tool = "llama-bench" + else: + raise RuntimeError(f"Table 'test' not found for tool 'llama-bench'. Available tables: {table_names}") + elif tool == "test-backend-ops": + if "test_backend_ops" in table_names: + self.table_name = "test_backend_ops" + tool = "test-backend-ops" + else: + raise RuntimeError(f"Table 'test_backend_ops' not found for tool 'test-backend-ops'. Available tables: {table_names}") + else: + raise RuntimeError(f"Unknown tool: {tool}") + + super().__init__(tool) + self._builds_init() + + @staticmethod + def valid_format(data_file: str) -> bool: + connection = sqlite3.connect(data_file) + cursor = connection.cursor() + + try: + if cursor.execute("PRAGMA schema_version;").fetchone()[0] == 0: + raise sqlite3.DatabaseError("The provided input file does not exist or is empty.") + except sqlite3.DatabaseError as e: + logger.debug(f'"{data_file}" is not a valid SQLite3 file.', exc_info=e) + cursor = None + + connection.close() + return True if cursor else False + + +class LlamaBenchDataJSONL(LlamaBenchDataSQLite3): + def __init__(self, data_file: str, tool: str = "llama-bench"): + super().__init__(tool) + + # Get the appropriate field list based on tool + db_fields = LLAMA_BENCH_DB_FIELDS if tool == "llama-bench" else TEST_BACKEND_OPS_DB_FIELDS + + with open(data_file, "r", encoding="utf-8") as fp: + for i, line in enumerate(fp): + parsed = json.loads(line) + + for k in parsed.keys() - set(db_fields): + del parsed[k] + + if (missing_keys := self._check_keys(parsed.keys())): + raise RuntimeError(f"Missing required data key(s) at line {i + 1}: {', '.join(missing_keys)}") + + self.cursor.execute(f"INSERT INTO {self.table_name}({', '.join(parsed.keys())}) VALUES({', '.join('?' * len(parsed))});", tuple(parsed.values())) + + self._builds_init() + + @staticmethod + def valid_format(data_file: str) -> bool: + try: + with open(data_file, "r", encoding="utf-8") as fp: + for line in fp: + json.loads(line) + break + except Exception as e: + logger.debug(f'"{data_file}" is not a valid JSONL file.', exc_info=e) + return False + + return True + + +class LlamaBenchDataJSON(LlamaBenchDataSQLite3): + def __init__(self, data_files: list[str], tool: str = "llama-bench"): + super().__init__(tool) + + # Get the appropriate field list based on tool + db_fields = LLAMA_BENCH_DB_FIELDS if tool == "llama-bench" else TEST_BACKEND_OPS_DB_FIELDS + + for data_file in data_files: + with open(data_file, "r", encoding="utf-8") as fp: + parsed = json.load(fp) + + for i, entry in enumerate(parsed): + for k in entry.keys() - set(db_fields): + del entry[k] + + if (missing_keys := self._check_keys(entry.keys())): + raise RuntimeError(f"Missing required data key(s) at entry {i + 1}: {', '.join(missing_keys)}") + + self.cursor.execute(f"INSERT INTO {self.table_name}({', '.join(entry.keys())}) VALUES({', '.join('?' * len(entry))});", tuple(entry.values())) + + self._builds_init() + + @staticmethod + def valid_format(data_files: list[str]) -> bool: + if not data_files: + return False + + for data_file in data_files: + try: + with open(data_file, "r", encoding="utf-8") as fp: + json.load(fp) + except Exception as e: + logger.debug(f'"{data_file}" is not a valid JSON file.', exc_info=e) + return False + + return True + + +class LlamaBenchDataCSV(LlamaBenchDataSQLite3): + def __init__(self, data_files: list[str], tool: str = "llama-bench"): + super().__init__(tool) + + # Get the appropriate field list based on tool + db_fields = LLAMA_BENCH_DB_FIELDS if tool == "llama-bench" else TEST_BACKEND_OPS_DB_FIELDS + + for data_file in data_files: + with open(data_file, "r", encoding="utf-8") as fp: + for i, parsed in enumerate(csv.DictReader(fp)): + keys = set(parsed.keys()) + + for k in keys - set(db_fields): + del parsed[k] + + if (missing_keys := self._check_keys(keys)): + raise RuntimeError(f"Missing required data key(s) at line {i + 1}: {', '.join(missing_keys)}") + + self.cursor.execute(f"INSERT INTO {self.table_name}({', '.join(parsed.keys())}) VALUES({', '.join('?' * len(parsed))});", tuple(parsed.values())) + + self._builds_init() + + @staticmethod + def valid_format(data_files: list[str]) -> bool: + if not data_files: + return False + + for data_file in data_files: + try: + with open(data_file, "r", encoding="utf-8") as fp: + for parsed in csv.DictReader(fp): + break + except Exception as e: + logger.debug(f'"{data_file}" is not a valid CSV file.', exc_info=e) + return False + + return True + + +def format_flops(flops_value: float) -> str: + """Format FLOPS values with appropriate units for better readability.""" + if flops_value == 0: + return "0.00" + + # Define unit thresholds and names + units = [ + (1e12, "T"), # TeraFLOPS + (1e9, "G"), # GigaFLOPS + (1e6, "M"), # MegaFLOPS + (1e3, "k"), # kiloFLOPS + (1, "") # FLOPS + ] + + for threshold, unit in units: + if abs(flops_value) >= threshold: + formatted_value = flops_value / threshold + if formatted_value >= 100: + return f"{formatted_value:.1f}{unit}" + else: + return f"{formatted_value:.2f}{unit}" + + # Fallback for very small values + return f"{flops_value:.2f}" + + +def format_flops_for_table(flops_value: float, target_unit: str) -> str: + """Format FLOPS values for table display without unit suffix (since unit is in header).""" + if flops_value == 0: + return "0.00" + + # Define unit thresholds based on target unit + unit_divisors = { + "TFLOPS": 1e12, + "GFLOPS": 1e9, + "MFLOPS": 1e6, + "kFLOPS": 1e3, + "FLOPS": 1 + } + + divisor = unit_divisors.get(target_unit, 1) + formatted_value = flops_value / divisor + + if formatted_value >= 100: + return f"{formatted_value:.1f}" + else: + return f"{formatted_value:.2f}" + + +def get_flops_unit_name(flops_values: list) -> str: + """Determine the best FLOPS unit name based on the magnitude of values.""" + if not flops_values or all(v == 0 for v in flops_values): + return "FLOPS" + + # Find the maximum absolute value to determine appropriate unit + max_flops = max(abs(v) for v in flops_values if v != 0) + + if max_flops >= 1e12: + return "TFLOPS" + elif max_flops >= 1e9: + return "GFLOPS" + elif max_flops >= 1e6: + return "MFLOPS" + elif max_flops >= 1e3: + return "kFLOPS" + else: + return "FLOPS" + + +bench_data = None +if len(input_file) == 1: + if LlamaBenchDataSQLite3File.valid_format(input_file[0]): + bench_data = LlamaBenchDataSQLite3File(input_file[0], tool) + elif LlamaBenchDataJSON.valid_format(input_file): + bench_data = LlamaBenchDataJSON(input_file, tool) + elif LlamaBenchDataJSONL.valid_format(input_file[0]): + bench_data = LlamaBenchDataJSONL(input_file[0], tool) + elif LlamaBenchDataCSV.valid_format(input_file): + bench_data = LlamaBenchDataCSV(input_file, tool) +else: + if LlamaBenchDataJSON.valid_format(input_file): + bench_data = LlamaBenchDataJSON(input_file, tool) + elif LlamaBenchDataCSV.valid_format(input_file): + bench_data = LlamaBenchDataCSV(input_file, tool) + +if not bench_data: + raise RuntimeError("No valid (or some invalid) input files found.") + +if not bench_data.builds: + raise RuntimeError(f"{input_file} does not contain any builds.") + +tool = bench_data.tool # May have chosen a default if tool was None. + + +hexsha8_baseline = name_baseline = None + +# If the user specified a baseline, try to find a commit for it: +if known_args.baseline is not None: + if known_args.baseline in bench_data.builds: + hexsha8_baseline = known_args.baseline + if hexsha8_baseline is None: + hexsha8_baseline = bench_data.get_commit_hexsha8(known_args.baseline) + name_baseline = known_args.baseline + if hexsha8_baseline is None: + logger.error(f"cannot find data for baseline={known_args.baseline}.") + sys.exit(1) +# Otherwise, search for the most recent parent of master for which there is data: +elif bench_data.repo is not None: + hexsha8_baseline = bench_data.find_parent_in_data(bench_data.repo.heads.master.commit) + + if hexsha8_baseline is None: + logger.error("No baseline was provided and did not find data for any master branch commits.\n") + parser.print_help() + sys.exit(1) +else: + logger.error("No baseline was provided and the current working directory " + "is not part of a git repository from which a baseline could be inferred.\n") + parser.print_help() + sys.exit(1) + + +name_baseline = bench_data.get_commit_name(hexsha8_baseline) + +hexsha8_compare = name_compare = None + +# If the user has specified a compare value, try to find a corresponding commit: +if known_args.compare is not None: + if known_args.compare in bench_data.builds: + hexsha8_compare = known_args.compare + if hexsha8_compare is None: + hexsha8_compare = bench_data.get_commit_hexsha8(known_args.compare) + name_compare = known_args.compare + if hexsha8_compare is None: + logger.error(f"cannot find data for compare={known_args.compare}.") + sys.exit(1) +# Otherwise, search for the commit for llama-bench was most recently run +# and that is not a parent of master: +elif bench_data.repo is not None: + hexsha8s_master = bench_data.get_all_parent_hexsha8s(bench_data.repo.heads.master.commit) + for (hexsha8, _) in bench_data.builds_timestamp(reverse=True): + if hexsha8 not in hexsha8s_master: + hexsha8_compare = hexsha8 + break + + if hexsha8_compare is None: + logger.error("No compare target was provided and did not find data for any non-master commits.\n") + parser.print_help() + sys.exit(1) +else: + logger.error("No compare target was provided and the current working directory " + "is not part of a git repository from which a compare target could be inferred.\n") + parser.print_help() + sys.exit(1) + +name_compare = bench_data.get_commit_name(hexsha8_compare) + +# Get tool-specific configuration +if tool == "llama-bench": + key_properties = LLAMA_BENCH_KEY_PROPERTIES + bool_properties = LLAMA_BENCH_BOOL_PROPERTIES + pretty_names = LLAMA_BENCH_PRETTY_NAMES + default_show = DEFAULT_SHOW_LLAMA_BENCH + default_hide = DEFAULT_HIDE_LLAMA_BENCH +elif tool == "test-backend-ops": + key_properties = TEST_BACKEND_OPS_KEY_PROPERTIES + bool_properties = TEST_BACKEND_OPS_BOOL_PROPERTIES + pretty_names = TEST_BACKEND_OPS_PRETTY_NAMES + default_show = DEFAULT_SHOW_TEST_BACKEND_OPS + default_hide = DEFAULT_HIDE_TEST_BACKEND_OPS +else: + assert False + +# If the user provided columns to group the results by, use them: +if known_args.show is not None: + show = known_args.show.split(",") + unknown_cols = [] + for prop in show: + valid_props = key_properties if tool == "test-backend-ops" else key_properties[:-3] # Exclude n_prompt, n_gen, n_depth for llama-bench + if prop not in valid_props: + unknown_cols.append(prop) + if unknown_cols: + logger.error(f"Unknown values for --show: {', '.join(unknown_cols)}") + parser.print_usage() + sys.exit(1) + rows_show = bench_data.get_rows(show, hexsha8_baseline, hexsha8_compare) +# Otherwise, select those columns where the values are not all the same: +else: + rows_full = bench_data.get_rows(key_properties, hexsha8_baseline, hexsha8_compare) + properties_different = [] + + if tool == "llama-bench": + # For llama-bench, skip n_prompt, n_gen, n_depth from differentiation logic + check_properties = [kp for kp in key_properties if kp not in ["n_prompt", "n_gen", "n_depth"]] + for i, kp_i in enumerate(key_properties): + if kp_i in default_show or kp_i in ["n_prompt", "n_gen", "n_depth"]: + continue + for row_full in rows_full: + if row_full[i] != rows_full[0][i]: + properties_different.append(kp_i) + break + elif tool == "test-backend-ops": + # For test-backend-ops, check all key properties + for i, kp_i in enumerate(key_properties): + if kp_i in default_show: + continue + for row_full in rows_full: + if row_full[i] != rows_full[0][i]: + properties_different.append(kp_i) + break + else: + assert False + + show = [] + + if tool == "llama-bench": + # Show CPU and/or GPU by default even if the hardware for all results is the same: + if rows_full and "n_gpu_layers" not in properties_different: + ngl = int(rows_full[0][key_properties.index("n_gpu_layers")]) + + if ngl != 99 and "cpu_info" not in properties_different: + show.append("cpu_info") + + show += properties_different + + index_default = 0 + for prop in ["cpu_info", "gpu_info", "n_gpu_layers", "main_gpu"]: + if prop in show: + index_default += 1 + show = show[:index_default] + default_show + show[index_default:] + elif tool == "test-backend-ops": + show = default_show + properties_different + else: + assert False + + for prop in default_hide: + try: + show.remove(prop) + except ValueError: + pass + + # Add plot_x parameter to parameters to show if it's not already present: + if known_args.plot: + for k, v in pretty_names.items(): + if v == known_args.plot_x and k not in show: + show.append(k) + break + + rows_show = bench_data.get_rows(show, hexsha8_baseline, hexsha8_compare) + +if not rows_show: + logger.error(f"No comparable data was found between {name_baseline} and {name_compare}.\n") + sys.exit(1) + +table = [] +primary_metric = "FLOPS" # Default to FLOPS for test-backend-ops + +if tool == "llama-bench": + # For llama-bench, create test names and compare avg_ts values + for row in rows_show: + n_prompt = int(row[-5]) + n_gen = int(row[-4]) + n_depth = int(row[-3]) + if n_prompt != 0 and n_gen == 0: + test_name = f"pp{n_prompt}" + elif n_prompt == 0 and n_gen != 0: + test_name = f"tg{n_gen}" + else: + test_name = f"pp{n_prompt}+tg{n_gen}" + if n_depth != 0: + test_name = f"{test_name}@d{n_depth}" + # Regular columns test name avg t/s values Speedup + # VVVVVVVVVVVVV VVVVVVVVV VVVVVVVVVVVVVV VVVVVVV + table.append(list(row[:-5]) + [test_name] + list(row[-2:]) + [float(row[-1]) / float(row[-2])]) +elif tool == "test-backend-ops": + # Determine the primary metric by checking rows until we find one with valid data + if rows_show: + primary_metric = "FLOPS" # Default to FLOPS + flops_values = [] + + # Collect all FLOPS values to determine the best unit + for sample_row in rows_show: + baseline_flops = float(sample_row[-4]) + compare_flops = float(sample_row[-3]) + baseline_bandwidth = float(sample_row[-2]) + + if baseline_flops > 0: + flops_values.extend([baseline_flops, compare_flops]) + elif baseline_bandwidth > 0 and not flops_values: + primary_metric = "Bandwidth (GB/s)" + + # If we have FLOPS data, determine the appropriate unit + if flops_values: + primary_metric = get_flops_unit_name(flops_values) + + # For test-backend-ops, prioritize FLOPS > bandwidth for comparison + for row in rows_show: + # Extract metrics: flops, bandwidth_gb_s (baseline and compare) + baseline_flops = float(row[-4]) + compare_flops = float(row[-3]) + baseline_bandwidth = float(row[-2]) + compare_bandwidth = float(row[-1]) + + # Determine which metric to use for comparison (prioritize FLOPS > bandwidth) + if baseline_flops > 0 and compare_flops > 0: + # Use FLOPS comparison (higher is better) + speedup = compare_flops / baseline_flops + baseline_str = format_flops_for_table(baseline_flops, primary_metric) + compare_str = format_flops_for_table(compare_flops, primary_metric) + elif baseline_bandwidth > 0 and compare_bandwidth > 0: + # Use bandwidth comparison (higher is better) + speedup = compare_bandwidth / baseline_bandwidth + baseline_str = f"{baseline_bandwidth:.2f}" + compare_str = f"{compare_bandwidth:.2f}" + else: + # Fallback if no valid data is available + baseline_str = "N/A" + compare_str = "N/A" + from math import nan + speedup = nan + + table.append(list(row[:-4]) + [baseline_str, compare_str, speedup]) +else: + assert False + +# Some a-posteriori fixes to make the table contents prettier: +for bool_property in bool_properties: + if bool_property in show: + ip = show.index(bool_property) + for row_table in table: + row_table[ip] = "Yes" if int(row_table[ip]) == 1 else "No" + +if tool == "llama-bench": + if "model_type" in show: + ip = show.index("model_type") + for (old, new) in MODEL_SUFFIX_REPLACE.items(): + for row_table in table: + row_table[ip] = row_table[ip].replace(old, new) + + if "model_size" in show: + ip = show.index("model_size") + for row_table in table: + row_table[ip] = float(row_table[ip]) / 1024 ** 3 + + if "gpu_info" in show: + ip = show.index("gpu_info") + for row_table in table: + for gns in GPU_NAME_STRIP: + row_table[ip] = row_table[ip].replace(gns, "") + + gpu_names = row_table[ip].split(", ") + num_gpus = len(gpu_names) + all_names_the_same = len(set(gpu_names)) == 1 + if len(gpu_names) >= 2 and all_names_the_same: + row_table[ip] = f"{num_gpus}x {gpu_names[0]}" + +headers = [pretty_names.get(p, p) for p in show] +if tool == "llama-bench": + headers += ["Test", f"t/s {name_baseline}", f"t/s {name_compare}", "Speedup"] +elif tool == "test-backend-ops": + headers += [f"{primary_metric} {name_baseline}", f"{primary_metric} {name_compare}", "Speedup"] +else: + assert False + +if known_args.plot: + def create_performance_plot(table_data: list[list[str]], headers: list[str], baseline_name: str, compare_name: str, output_file: str, plot_x_param: str, log_scale: bool = False, tool_type: str = "llama-bench", metric_name: str = "t/s"): + try: + import matplotlib + import matplotlib.pyplot as plt + matplotlib.use('Agg') + except ImportError as e: + logger.error("matplotlib is required for --plot.") + raise e + + data_headers = headers[:-4] # Exclude the last 4 columns (Test, baseline t/s, compare t/s, Speedup) + plot_x_index = None + plot_x_label = plot_x_param + + if plot_x_param not in ["n_prompt", "n_gen", "n_depth"]: + pretty_name = LLAMA_BENCH_PRETTY_NAMES.get(plot_x_param, plot_x_param) + if pretty_name in data_headers: + plot_x_index = data_headers.index(pretty_name) + plot_x_label = pretty_name + elif plot_x_param in data_headers: + plot_x_index = data_headers.index(plot_x_param) + plot_x_label = plot_x_param + else: + logger.error(f"Parameter '{plot_x_param}' not found in current table columns. Available columns: {', '.join(data_headers)}") + return + + grouped_data = {} + + for i, row in enumerate(table_data): + group_key_parts = [] + test_name = row[-4] + + base_test = "" + x_value = None + + if plot_x_param in ["n_prompt", "n_gen", "n_depth"]: + for j, val in enumerate(row[:-4]): + header_name = data_headers[j] + if val is not None and str(val).strip(): + group_key_parts.append(f"{header_name}={val}") + + if plot_x_param == "n_prompt" and "pp" in test_name: + base_test = test_name.split("@")[0] + x_value = base_test + elif plot_x_param == "n_gen" and "tg" in test_name: + x_value = test_name.split("@")[0] + elif plot_x_param == "n_depth" and "@d" in test_name: + base_test = test_name.split("@d")[0] + x_value = int(test_name.split("@d")[1]) + else: + base_test = test_name + + if base_test.strip(): + group_key_parts.append(f"Test={base_test}") + else: + for j, val in enumerate(row[:-4]): + if j != plot_x_index: + header_name = data_headers[j] + if val is not None and str(val).strip(): + group_key_parts.append(f"{header_name}={val}") + else: + x_value = val + + group_key_parts.append(f"Test={test_name}") + + group_key = tuple(group_key_parts) + + if group_key not in grouped_data: + grouped_data[group_key] = [] + + grouped_data[group_key].append({ + 'x_value': x_value, + 'baseline': float(row[-3]), + 'compare': float(row[-2]), + 'speedup': float(row[-1]) + }) + + if not grouped_data: + logger.error("No data available for plotting") + return + + def make_axes(num_groups, max_cols=2, base_size=(8, 4)): + from math import ceil + cols = 1 if num_groups == 1 else min(max_cols, num_groups) + rows = ceil(num_groups / cols) + + # Scale figure size by grid dimensions + w, h = base_size + fig, ax_arr = plt.subplots(rows, cols, + figsize=(w * cols, h * rows), + squeeze=False) + + axes = ax_arr.flatten()[:num_groups] + return fig, axes + + num_groups = len(grouped_data) + fig, axes = make_axes(num_groups) + + plot_idx = 0 + + for group_key, points in grouped_data.items(): + if plot_idx >= len(axes): + break + ax = axes[plot_idx] + + try: + points_sorted = sorted(points, key=lambda p: float(p['x_value']) if p['x_value'] is not None else 0) + x_values = [float(p['x_value']) if p['x_value'] is not None else 0 for p in points_sorted] + except ValueError: + points_sorted = sorted(points, key=lambda p: group_key) + x_values = [p['x_value'] for p in points_sorted] + + baseline_vals = [p['baseline'] for p in points_sorted] + compare_vals = [p['compare'] for p in points_sorted] + + ax.plot(x_values, baseline_vals, 'o-', color='skyblue', + label=f'{baseline_name}', linewidth=2, markersize=6) + ax.plot(x_values, compare_vals, 's--', color='lightcoral', alpha=0.8, + label=f'{compare_name}', linewidth=2, markersize=6) + + if log_scale: + ax.set_xscale('log', base=2) + unique_x = sorted(set(x_values)) + ax.set_xticks(unique_x) + ax.set_xticklabels([str(int(x)) for x in unique_x]) + + title_parts = [] + for part in group_key: + if '=' in part: + key, value = part.split('=', 1) + title_parts.append(f"{key}: {value}") + + title = ', '.join(title_parts) if title_parts else "Performance comparison" + + # Determine y-axis label based on tool type + if tool_type == "llama-bench": + y_label = "Tokens per second (t/s)" + elif tool_type == "test-backend-ops": + y_label = metric_name + else: + assert False + + ax.set_xlabel(plot_x_label, fontsize=12, fontweight='bold') + ax.set_ylabel(y_label, fontsize=12, fontweight='bold') + ax.set_title(title, fontsize=12, fontweight='bold') + ax.legend(loc='best', fontsize=10) + ax.grid(True, alpha=0.3) + + plot_idx += 1 + + for i in range(plot_idx, len(axes)): + axes[i].set_visible(False) + + fig.suptitle(f'Performance comparison: {compare_name} vs. {baseline_name}', + fontsize=14, fontweight='bold') + fig.subplots_adjust(top=1) + + plt.tight_layout() + plt.savefig(output_file, dpi=300, bbox_inches='tight') + plt.close() + + create_performance_plot(table, headers, name_baseline, name_compare, known_args.plot, known_args.plot_x, known_args.plot_log_scale, tool, primary_metric) + +print(tabulate( # noqa: NP100 + table, + headers=headers, + floatfmt=".2f", + tablefmt=known_args.output +)) diff --git a/llama.cpp/scripts/compare-logprobs.py b/llama.cpp/scripts/compare-logprobs.py new file mode 100644 index 0000000..63861dd --- /dev/null +++ b/llama.cpp/scripts/compare-logprobs.py @@ -0,0 +1,281 @@ +import argparse +import requests +import json +from pathlib import Path +import logging + +logger = logging.getLogger("compare-logprobs") +logging.basicConfig(level=logging.INFO) + + +DESCRIPTION = """ +Compare logits between llama.cpp and another inference engine using OpenAI-compatible server endpoints. + +Unlike compare-logits.py, it allows dumping logits from a hosted API endpoint. Useful when it's not possible to run both models locally. + +Example usage: + Step 1: Dump logits from two different servers + python scripts/compare-logprobs.py dump logits_llama.log http://localhost:8080/v1/completions + python scripts/compare-logprobs.py dump logits_other.log http://other-engine:8000/v1/completions + + (optionally, you can add --api-key <key> if the endpoint requires authentication) + + Step 2: Compare the dumped logits + python scripts/compare-logprobs.py compare logits_llama.log logits_other.log report.md +""" + + +def generate_input_prompt(length: int) -> list[str]: + CORPUS = """ + You are an advanced AI assistant capable of using tools to gather information, perform calculations, or execute tasks. Always think step by step before responding. If a user's query requires external data, computation, or actions beyond your internal knowledge, use the appropriate tools via function calls. + + ### Tool Call Format: + When you need to use a tool, output the call in this exact XML format. Include the opening and closing tags. Do not escape arguments; they will be parsed as plain text. + + You can make multiple calls in one go by placing them one after another. + """ + words = [w.strip() for w in CORPUS.strip().split(" ")] + words = [w for w in words if len(w) > 0] # filter out empty strings + while len(words) < length: + words += words + return words[:length] + + +def dump_logits( + endpoint: str, + output_path: Path, + input_words: list[str], + pattern: list[tuple[bool, int]], + api_key=None, +): + logger.info(f"Dumping logits to {output_path} from endpoint {endpoint}...") + words = input_words + curr_text = "" + n_total = sum(n for get, n in pattern if get) + n_done = 0 + i_cur = 0 + i_total = len(words) + with output_path.open("w") as f: + for get, n in pattern: + if not get: + # skip n words + for i in range(n): + curr_text += words.pop(0) + " " + i_cur += 1 + continue + # get n words + for i in range(n): + curr_text += words.pop(0) + " " + payload = { + "prompt": curr_text.strip(), + "temperature": 0.0, + "top_k": 1, + "max_tokens": 1, + "logprobs": 1, + "stream": False, + } + response = requests.post( + endpoint, + json=payload, + headers={"Authorization": f"Bearer {api_key}"} if api_key else {}, + ) + response.raise_for_status() + data = response.json() + data["__index"] = i_cur # add index for easier debugging later + data = json.dumps(data) + f.write(f"{data}\n") + n_done += 1 + i_cur += 1 + logger.info( + f"\n\n{data}\n\n[Step: {n_done}/{n_total} | Word: {i_cur}/{i_total}]" + ) + logger.info(f"Logits dumped to {output_path}") + + +def get_token_logprobs(data: dict): + logprobs = data["choices"][0]["logprobs"] + if "content" in logprobs: + # llama.cpp case + top = logprobs["content"][0]["top_logprobs"][0] + return top["token"], top["logprob"] + else: + # vllm case + tokens = logprobs["tokens"] + token_logprobs = logprobs["token_logprobs"] + return tokens[0], token_logprobs[0] + + +def clean_text(text: str) -> str: + return ( + "'" + + text.replace("\n", "\\n") + .replace("\t", "\\t") + .replace("\r", "\\r") + .replace("|", "\\|") + + "'" + ) + + +def compare_logits(input1: Path, input2: Path, output_path: Path): + with input1.open("r") as f1, input2.open("r") as f2, output_path.open("w") as fout: + lines1 = f1.readlines() + lines2 = f2.readlines() + + tab_header = [ + "idx", + input1.name, + "logprob_1", + input2.name, + "logprob_2", + "diff (abs)", + ] + tab_entries = [] + tab_max_widths = [len(h) for h in tab_header] + + assert len(lines1) == len( + lines2 + ), "Input files must have the same number of lines." + + fout.write("# Logits Comparison Report\n\n") + for i, (line1, line2) in enumerate(zip(lines1, lines2)): + if not line1.strip() or not line2.strip(): + continue # skip empty lines + + data1 = json.loads(line1) + data2 = json.loads(line2) + + idx1 = data1.get("__index", -1) + idx2 = data2.get("__index", -1) + if idx1 != idx2: + logger.warning( + f"Warning: Mismatched indices at line {i}: {idx1} vs {idx2}" + ) + + token1, logprob1 = get_token_logprobs(data1) + token2, logprob2 = get_token_logprobs(data2) + + token1 = clean_text(token1) + token2 = clean_text(token2) + abs_diff = abs(logprob1 - logprob2) + + tab_entries.append( + ( + str(idx1 + 1), + token1, + f"{logprob1:.4f}", + token2, + f"{logprob2:.4f}", + f"{(abs_diff):.4f}", + ) + ) + + for i in range(len(tab_entries)): + for j in range(len(tab_header)): + tab_max_widths[j] = max(tab_max_widths[j], len(tab_entries[i][j])) + + output = "" + for j in range(len(tab_header)): + output += f"| {tab_header[j]:<{tab_max_widths[j]}} " + output += "|\n" + for j in range(len(tab_header)): + output += f"|{'-' * (tab_max_widths[j] + 2)}" + output += "|\n" + for entry in tab_entries: + for j in range(len(tab_header)): + output += f"| {entry[j]:<{tab_max_widths[j]}} " + output += "|\n" + + logger.info("\n" + output) + fout.write(output) + logger.info(f"Report written to {output_path}") + + +def parse_pattern(pattern: str) -> list[tuple[bool, int]]: + parts = pattern.split(",") + result = [] + for i, part in enumerate(parts): + n = int(part) + if i % 2 == 0: + result.append((True, n)) # get n words + else: + result.append((False, n)) # skip n words + return result + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description=DESCRIPTION, formatter_class=argparse.RawTextHelpFormatter + ) + subparsers = parser.add_subparsers( + dest="verb", required=True, help="action to perform" + ) + + # dump subcommand + parser_dump = subparsers.add_parser("dump", help="dump logits from an endpoint") + parser_dump.add_argument( + "output", type=Path, help="output path for dumped logits (.log)" + ) + parser_dump.add_argument( + "endpoint", type=str, help="OAI-compat /completions endpoint" + ) + parser_dump.add_argument( + "--api-key", + type=str, + default=None, + help="API key for authentication (if required)", + ) + parser_dump.add_argument( + "--file", + type=Path, + default=None, + help="File containing prompt to use instead of the default", + ) + parser_dump.add_argument( + "--pattern", + type=str, + default="10,1000,10,4000,10", + help="Pattern n_get,n_skip,... where n_get is number of words to get and n_skip is number of words to skip (num of words, NOT num of tokens)", + ) + + # compare subcommand + parser_compare = subparsers.add_parser( + "compare", help="compare two dumped logits files" + ) + parser_compare.add_argument("input1", type=Path, help="first input file (.log)") + parser_compare.add_argument("input2", type=Path, help="second input file (.log)") + parser_compare.add_argument( + "output", type=Path, help="output path for comparison report (.md)" + ) + + try: + return parser.parse_args() + except Exception as e: + parser.print_help() + raise e + + +def main(): + args = parse_args() + + if args.verb == "dump": + pattern = parse_pattern(args.pattern) + input_length = sum(n for _, n in pattern) + input_words = generate_input_prompt(input_length) + if args.file is not None: + with args.file.open("r") as f: + input_words = f.read().strip().split(" ") + if input_length < sum(n for _, n in pattern): + raise ValueError( + f"Input file has only {input_length} words, but pattern requires at least {input_length} words." + ) + input_length = len(input_words) + logger.info(f"Using {input_length} words") + dump_logits(args.endpoint, args.output, input_words, pattern, args.api_key) + elif args.verb == "compare": + compare_logits(args.input1, args.input2, args.output) + else: + raise ValueError(f"Unknown verb: {args.verb}") + + +if __name__ == "__main__": + main() diff --git a/llama.cpp/scripts/create_ops_docs.py b/llama.cpp/scripts/create_ops_docs.py new file mode 100755 index 0000000..e3a476a --- /dev/null +++ b/llama.cpp/scripts/create_ops_docs.py @@ -0,0 +1,201 @@ +#!/usr/bin/env python3 + +""" +This script parses docs/ops/*.csv and creates the ops.md, which is a table documenting supported operations on various ggml backends. +""" +import csv +import logging +import sys +from pathlib import Path +from collections import defaultdict + + +class DocsGenerator: + def __init__(self, ggml_root: str, output_filename: str = "ops.md"): + self.ggml_root = Path(ggml_root) + self.ops_dir = self.ggml_root / "docs" / "ops" + self.output_filename = output_filename + self.backend_support: dict[str, dict[str, list[bool]]] = defaultdict( + lambda: defaultdict(list) + ) + self.all_operations: set[str] = set() + self.all_backends: set[str] = set() + self.logger = logging.getLogger(__name__) + + def parse_support_files(self) -> None: + if not self.ops_dir.exists(): + self.logger.warning(f"ops directory not found: {self.ops_dir}") + return + + self.logger.info(f"Parsing support files from {self.ops_dir}...") + + for support_file in self.ops_dir.glob("*.csv"): + self.logger.info(f" Reading: {support_file.name}") + self._parse_support_file(support_file) + + def _parse_support_file(self, file_path: Path) -> None: + try: + with open(file_path, "r", newline='') as f: + reader = csv.DictReader(f) + + for row in reader: + # Skip rows that don't have support mode + if row.get('test_mode') != 'support': + continue + + backend_name = row.get('backend_name', '').strip() + operation = row.get('op_name', '').strip() + supported_str = row.get('error_message', '').strip() # "yes" or "no" + backend_reg_name = row.get('backend_reg_name', '').strip() + + # Skip invalid or error operations + if not operation or not backend_name or operation in [ + "CONTEXT_ERROR", + "BUILD_ERROR", + ]: + continue + + is_supported = supported_str.lower() == "yes" + + # Use backend_reg_name for grouping, fallback to backend_name + backend_key = backend_reg_name if backend_reg_name else backend_name + + self.all_backends.add(backend_key) + self.backend_support[backend_key][operation].append(is_supported) + self.all_operations.add(operation) + + except Exception as e: + self.logger.error(f" Error parsing {file_path}: {e}") + + def get_backend_support_status(self, backend: str, operation: str) -> str: + support_list = self.backend_support[backend].get(operation, []) + + if not support_list: + return "unsupported" + + all_supported = all(support_list) + any_supported = any(support_list) + + if all_supported: + return "supported" + elif any_supported: + return "partially supported" + else: + return "unsupported" + + def get_support_status(self, operation: str) -> str: + if operation not in self.all_operations: + return "unsupported" + + support_count = 0 + total_backends = len(self.all_backends) + + for backend in self.all_backends: + if self.backend_support[backend].get(operation, False): + support_count += 1 + + if support_count == 0: + return "unsupported" + elif support_count == total_backends: + return "supported" + else: + return "partially supported" + + def get_support_symbol(self, status: str) -> str: + symbols = {"supported": "✅", "partially supported": "🟡", "unsupported": "❌"} + return symbols.get(status, "❓") + + def generate_markdown(self) -> str: + lines = [] + + lines.append("# GGML Operations") + lines.append("") + lines.append("List of GGML operations and backend support status.") + lines.append("") + lines.append("## How to add a backend to this table:") + lines.append("") + lines.append("1. Run `test-backend-ops support --output csv` with your backend name and redirect output to a csv file in `docs/ops/` (e.g., `docs/ops/CUDA.csv`)") + lines.append("2. Regenerate `/docs/ops.md` via `./scripts/create_ops_docs.py`") + lines.append("") + lines.append("Legend:") + lines.append("- ✅ Fully supported by this backend") + lines.append("- 🟡 Partially supported by this backend") + lines.append("- ❌ Not supported by this backend") + lines.append("") + + backends = sorted(self.all_backends) + header = "| Operation |" + for backend in backends: + header += f" {backend} |" + + separator = "|-----------|" + for _ in backends: + separator += "------|" + + lines.append(header) + lines.append(separator) + + sorted_operations = sorted(self.all_operations) + + for operation in sorted_operations: + row = f"| {operation:>32} |" + + for backend in backends: + status = self.get_backend_support_status(backend, operation) + if status == "supported": + symbol = "✅" + elif status == "partially supported": + symbol = "🟡" + else: + symbol = "❌" + row += f" {symbol} |" + + lines.append(row) + + lines.append("") + + return "\n".join(lines) + + def run(self) -> None: + self.logger.info("Parsing GGML operation support files...") + self.parse_support_files() + + if not self.all_operations: + self.logger.error( + "No operations found. Make sure to run test-backend-ops support --output csv > docs/ops/file.csv first." + ) + return + + self.logger.info( + f"Found {len(self.all_operations)} operations across {len(self.all_backends)} backends" + ) + + self.logger.info("Generating markdown...") + markdown_content = self.generate_markdown() + + docs_dir = self.ggml_root / "docs" + docs_dir.mkdir(exist_ok=True) + + ops_file = docs_dir / self.output_filename + with open(ops_file, "w") as f: + f.write(markdown_content) + + self.logger.info(f"Generated: {ops_file}") + self.logger.info(f"Operations: {len(self.all_operations)}") + self.logger.info(f"Backends: {len(self.all_backends)}") + + +def main(): + logging.basicConfig(level=logging.INFO) + + if len(sys.argv) > 1: + output_filename = sys.argv[1] + else: + output_filename = "ops.md" + + generator = DocsGenerator(".", output_filename) + generator.run() + + +if __name__ == "__main__": + main() diff --git a/llama.cpp/scripts/debug-test.sh b/llama.cpp/scripts/debug-test.sh new file mode 100755 index 0000000..ead7ea1 --- /dev/null +++ b/llama.cpp/scripts/debug-test.sh @@ -0,0 +1,202 @@ +#!/usr/bin/env bash + +PROG=${0##*/} +build_dir="build-ci-debug" + +# Print Color Commands +red=$(tput setaf 1) +green=$(tput setaf 2) +yellow=$(tput setaf 3) +blue=$(tput setaf 4) +magenta=$(tput setaf 5) +cyan=$(tput setaf 6) +normal=$(tput sgr0) + + +# Print Help Message +#################### + +print_full_help() { + cat << EOF +Usage: $PROG [OPTION]... <test_regex> (test_number) +Debug specific ctest program. + +Options: + -h, --help display this help and exit + -g run in gdb mode + +Arguments: + <test_regex> (Mandatory) Supply one regex to the script to filter tests + (test_number) (Optional) Test number to run a specific test + +Example: + $PROG test-tokenizer + $PROG test-tokenizer 3 +EOF +} + +abort() { + echo "Error: $1" >&2 + cat << EOF >&2 +Usage: $PROG [OPTION]... <test_regex> (test_number) +Debug specific ctest program. +Refer to --help for full instructions. +EOF + exit 1 +} + + +# Dependency Sanity Check +######################### + +check_dependency() { + command -v "$1" >/dev/null 2>&1 || { + abort "$1 is required but not found. Please install it and try again." + } +} + +check_dependency ctest +check_dependency cmake + + +# Step 0: Check the args +######################## + +if [ x"$1" = x"-h" ] || [ x"$1" = x"--help" ]; then + print_full_help >&2 + exit 0 +fi + +# Parse command-line options +gdb_mode=false +while getopts "g" opt; do + case $opt in + g) + gdb_mode=true + echo "gdb_mode Mode Enabled" + ;; + esac +done + +# Shift the option parameters +shift $((OPTIND - 1)) + +# Positionial Argument Processing : <test_regex> +if [ -z "${1}" ]; then + abort "Test regex is required" +else + test_suite=${1:-} +fi + +# Positionial Argument Processing : (test_number) +test_number=${2:-} + + +# Step 1: Reset and Setup folder context +######################################## + +## Sanity check that we are actually in a git repo +repo_root=$(git rev-parse --show-toplevel) +if [ ! -d "$repo_root" ]; then + abort "Not in a Git repository." +fi + +## Reset folder to root context of git repo and Create and enter build directory +pushd "$repo_root" +rm -rf "$build_dir" && mkdir "$build_dir" || abort "Failed to make $build_dir" + + +# Step 2: Setup Build Environment and Compile Test Binaries +########################################################### + +cmake -B "./$build_dir" -DCMAKE_BUILD_TYPE=Debug -DGGML_CUDA=1 || abort "Failed to build environment" +pushd "$build_dir" +make -j || abort "Failed to compile" +popd > /dev/null || exit 1 + + +# Step 3: Find all tests available that matches REGEX +#################################################### + +# Ctest Gather Tests +# `-R test-tokenizer` : looks for all the test files named `test-tokenizer*` (R=Regex) +# `-N` : "show-only" disables test execution & shows test commands that you can feed to GDB. +# `-V` : Verbose Mode +printf "\n\nGathering tests that fit REGEX: ${test_suite} ...\n" +pushd "$build_dir" +tests=($(ctest -R ${test_suite} -V -N | grep -E " +Test +#[0-9]+*" | cut -d':' -f2 | awk '{$1=$1};1')) +if [ ${#tests[@]} -eq 0 ]; then + abort "No tests available... check your compilation process..." +fi +popd > /dev/null || exit 1 + + +# Step 4: Identify Test Command for Debugging +############################################# + +# Select test number +if [ -z $test_number ]; then + # List out available tests + printf "Which test would you like to debug?\n" + id=0 + for s in "${tests[@]}" + do + echo "Test# ${id}" + echo " $s" + ((id++)) + done + + # Prompt user which test they wanted to run + printf "\nRun test#? " + read test_number + +else + printf "\nUser Already Requested #${test_number}\n" + +fi + +# Grab all tests commands +pushd "$build_dir" +sIFS=$IFS # Save Initial IFS (Internal Field Separator) +IFS=$'\n' # Change IFS (Internal Field Separator) (So we split ctest output by newline rather than by spaces) +test_args=($(ctest -R ${test_suite} -V -N | grep "Test command" | cut -d':' -f3 | awk '{$1=$1};1' )) # Get test args +IFS=$sIFS # Reset IFS (Internal Field Separator) +popd > /dev/null || exit 1 + +# Grab specific test command +single_test_name="${tests[test_number]}" +single_test_command="${test_args[test_number]}" + + +# Step 5: Execute or GDB Debug +############################## + +printf "${magenta}Running Test #${test_number}: ${single_test_name}${normal}\n" +printf "${cyan}single_test_command: ${single_test_command}${normal}\n" + +if [ "$gdb_mode" = "true" ]; then + # Execute debugger + pushd "$repo_root" || exit 1 + eval "gdb --args ${single_test_command}" + popd > /dev/null || exit 1 + +else + # Execute Test + pushd "$repo_root" || exit 1 + eval "${single_test_command}" + exit_code=$? + popd > /dev/null || exit 1 + + # Print Result + printf "${blue}Ran Test #${test_number}: ${single_test_name}${normal}\n" + printf "${yellow}Command: ${single_test_command}${normal}\n" + if [ $exit_code -eq 0 ]; then + printf "${green}TEST PASS${normal}\n" + else + printf "${red}TEST FAIL${normal}\n" + fi + +fi + +# Return to the directory from which the user ran the command. +popd > /dev/null || exit 1 diff --git a/llama.cpp/scripts/fetch_server_test_models.py b/llama.cpp/scripts/fetch_server_test_models.py new file mode 100755 index 0000000..ac483ef --- /dev/null +++ b/llama.cpp/scripts/fetch_server_test_models.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python +''' + This script fetches all the models used in the server tests. + + This is useful for slow tests that use larger models, to avoid them timing out on the model downloads. + + It is meant to be run from the root of the repository. + + Example: + python scripts/fetch_server_test_models.py + ( cd tools/server/tests && ./tests.sh -v -x -m slow ) +''' +import ast +import glob +import logging +import os +from typing import Generator +from pydantic import BaseModel +from typing import Optional +import subprocess + + +class HuggingFaceModel(BaseModel): + hf_repo: str + hf_file: Optional[str] = None + + class Config: + frozen = True + + +def collect_hf_model_test_parameters(test_file) -> Generator[HuggingFaceModel, None, None]: + try: + with open(test_file) as f: + tree = ast.parse(f.read()) + except Exception as e: + logging.error(f'collect_hf_model_test_parameters failed on {test_file}: {e}') + return + + for node in ast.walk(tree): + if isinstance(node, ast.FunctionDef): + for dec in node.decorator_list: + if isinstance(dec, ast.Call) and isinstance(dec.func, ast.Attribute) and dec.func.attr == 'parametrize': + param_names = ast.literal_eval(dec.args[0]).split(",") + if "hf_repo" not in param_names: + continue + + raw_param_values = dec.args[1] + if not isinstance(raw_param_values, ast.List): + logging.warning(f'Skipping non-list parametrize entry at {test_file}:{node.lineno}') + continue + + hf_repo_idx = param_names.index("hf_repo") + hf_file_idx = param_names.index("hf_file") if "hf_file" in param_names else None + + for t in raw_param_values.elts: + if not isinstance(t, ast.Tuple): + logging.warning(f'Skipping non-tuple parametrize entry at {test_file}:{node.lineno}') + continue + yield HuggingFaceModel( + hf_repo=ast.literal_eval(t.elts[hf_repo_idx]), + hf_file=ast.literal_eval(t.elts[hf_file_idx]) if hf_file_idx is not None else None) + + +if __name__ == '__main__': + logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') + + models = sorted(list(set([ + model + for test_file in glob.glob('tools/server/tests/unit/test_*.py') + for model in collect_hf_model_test_parameters(test_file) + ])), key=lambda m: (m.hf_repo, m.hf_file)) + + logging.info(f'Found {len(models)} models in parameterized tests:') + for m in models: + logging.info(f' - {m.hf_repo} / {m.hf_file}') + + cli_path = os.environ.get( + 'LLAMA_CLI_BIN_PATH', + os.path.join( + os.path.dirname(__file__), + '../build/bin/Release/llama-cli.exe' if os.name == 'nt' else '../build/bin/llama-cli')) + + for m in models: + if '<' in m.hf_repo or (m.hf_file is not None and '<' in m.hf_file): + continue + if m.hf_file is not None and '-of-' in m.hf_file: + logging.warning(f'Skipping model at {m.hf_repo} / {m.hf_file} because it is a split file') + continue + logging.info(f'Using llama-cli to ensure model {m.hf_repo}/{m.hf_file} was fetched') + cmd = [ + cli_path, + '-hfr', m.hf_repo, + *([] if m.hf_file is None else ['-hff', m.hf_file]), + '-n', '1', + '-p', 'Hey', + '--no-warmup', + '--log-disable', + '-no-cnv'] + if m.hf_file != 'tinyllamas/stories260K.gguf' and 'Mistral-Nemo' not in m.hf_repo: + cmd.append('-fa') + try: + subprocess.check_call(cmd) + except subprocess.CalledProcessError: + logging.error(f'Failed to fetch model at {m.hf_repo} / {m.hf_file} with command:\n {" ".join(cmd)}') + exit(1) diff --git a/llama.cpp/scripts/gen-authors.sh b/llama.cpp/scripts/gen-authors.sh new file mode 100755 index 0000000..73e7b38 --- /dev/null +++ b/llama.cpp/scripts/gen-authors.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +printf "# date: $(date)\n" > AUTHORS +printf "# this file is auto-generated by scripts/gen-authors.sh\n\n" >> AUTHORS + +git log --format='%an <%ae>' --reverse --date=short master | awk '!seen[$0]++' | sort >> AUTHORS + +# if necessary, update your name here. for example: jdoe -> John Doe +sed -i '' 's/^jdoe/John Doe/g' AUTHORS diff --git a/llama.cpp/scripts/gen-unicode-data.py b/llama.cpp/scripts/gen-unicode-data.py new file mode 100644 index 0000000..2d9bde0 --- /dev/null +++ b/llama.cpp/scripts/gen-unicode-data.py @@ -0,0 +1,196 @@ +from __future__ import annotations + +import array +import unicodedata +import requests + + +MAX_CODEPOINTS = 0x110000 + +UNICODE_DATA_URL = "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt" + + +# see https://www.unicode.org/L2/L1999/UnicodeData.html +def unicode_data_iter(): + res = requests.get(UNICODE_DATA_URL) + res.raise_for_status() + data = res.content.decode() + + prev = [] + + for line in data.splitlines(): + # ej: 0000;<control>;Cc;0;BN;;;;;N;NULL;;;; + line = line.split(";") + + cpt = int(line[0], base=16) + assert cpt < MAX_CODEPOINTS + + cpt_lower = int(line[-2] or "0", base=16) + assert cpt_lower < MAX_CODEPOINTS + + cpt_upper = int(line[-3] or "0", base=16) + assert cpt_upper < MAX_CODEPOINTS + + categ = line[2].strip() + assert len(categ) == 2 + + bidir = line[4].strip() + assert len(categ) == 2 + + name = line[1] + if name.endswith(", First>"): + prev = (cpt, cpt_lower, cpt_upper, categ, bidir) + continue + if name.endswith(", Last>"): + assert prev[1:] == (0, 0, categ, bidir) + for c in range(prev[0], cpt): + yield (c, cpt_lower, cpt_upper, categ, bidir) + + yield (cpt, cpt_lower, cpt_upper, categ, bidir) + + +# see definition in unicode.h +CODEPOINT_FLAG_UNDEFINED = 0x0001 # +CODEPOINT_FLAG_NUMBER = 0x0002 # \p{N} +CODEPOINT_FLAG_LETTER = 0x0004 # \p{L} +CODEPOINT_FLAG_SEPARATOR = 0x0008 # \p{Z} +CODEPOINT_FLAG_MARK = 0x0010 # \p{M} +CODEPOINT_FLAG_PUNCTUATION = 0x0020 # \p{P} +CODEPOINT_FLAG_SYMBOL = 0x0040 # \p{S} +CODEPOINT_FLAG_CONTROL = 0x0080 # \p{C} + +UNICODE_CATEGORY_TO_FLAG = { + "Cn": CODEPOINT_FLAG_UNDEFINED, # Undefined + "Cc": CODEPOINT_FLAG_CONTROL, # Control + "Cf": CODEPOINT_FLAG_CONTROL, # Format + "Co": CODEPOINT_FLAG_CONTROL, # Private Use + "Cs": CODEPOINT_FLAG_CONTROL, # Surrrogate + "Ll": CODEPOINT_FLAG_LETTER, # Lowercase Letter + "Lm": CODEPOINT_FLAG_LETTER, # Modifier Letter + "Lo": CODEPOINT_FLAG_LETTER, # Other Letter + "Lt": CODEPOINT_FLAG_LETTER, # Titlecase Letter + "Lu": CODEPOINT_FLAG_LETTER, # Uppercase Letter + "L&": CODEPOINT_FLAG_LETTER, # Cased Letter + "Mc": CODEPOINT_FLAG_MARK, # Spacing Mark + "Me": CODEPOINT_FLAG_MARK, # Enclosing Mark + "Mn": CODEPOINT_FLAG_MARK, # Nonspacing Mark + "Nd": CODEPOINT_FLAG_NUMBER, # Decimal Number + "Nl": CODEPOINT_FLAG_NUMBER, # Letter Number + "No": CODEPOINT_FLAG_NUMBER, # Other Number + "Pc": CODEPOINT_FLAG_PUNCTUATION, # Connector Punctuation + "Pd": CODEPOINT_FLAG_PUNCTUATION, # Dash Punctuation + "Pe": CODEPOINT_FLAG_PUNCTUATION, # Close Punctuation + "Pf": CODEPOINT_FLAG_PUNCTUATION, # Final Punctuation + "Pi": CODEPOINT_FLAG_PUNCTUATION, # Initial Punctuation + "Po": CODEPOINT_FLAG_PUNCTUATION, # Other Punctuation + "Ps": CODEPOINT_FLAG_PUNCTUATION, # Open Punctuation + "Sc": CODEPOINT_FLAG_SYMBOL, # Currency Symbol + "Sk": CODEPOINT_FLAG_SYMBOL, # Modifier Symbol + "Sm": CODEPOINT_FLAG_SYMBOL, # Math Symbol + "So": CODEPOINT_FLAG_SYMBOL, # Other Symbol + "Zl": CODEPOINT_FLAG_SEPARATOR, # Line Separator + "Zp": CODEPOINT_FLAG_SEPARATOR, # Paragraph Separator + "Zs": CODEPOINT_FLAG_SEPARATOR, # Space Separator +} + + +codepoint_flags = array.array('H', [CODEPOINT_FLAG_UNDEFINED]) * MAX_CODEPOINTS +table_whitespace = [] +table_lowercase = [] +table_uppercase = [] +table_nfd = [] + +for (cpt, cpt_lower, cpt_upper, categ, bidir) in unicode_data_iter(): + # convert codepoint to unicode character + char = chr(cpt) + + # codepoint category flags + codepoint_flags[cpt] = UNICODE_CATEGORY_TO_FLAG[categ] + + # lowercase conversion + if cpt_lower: + table_lowercase.append((cpt, cpt_lower)) + + # uppercase conversion + if cpt_upper: + table_uppercase.append((cpt, cpt_upper)) + + # NFD normalization + norm = ord(unicodedata.normalize('NFD', char)[0]) + if cpt != norm: + table_nfd.append((cpt, norm)) + + +# whitespaces, see "<White_Space>" https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt +table_whitespace.extend(range(0x0009, 0x000D + 1)) +table_whitespace.extend(range(0x2000, 0x200A + 1)) +table_whitespace.extend([0x0020, 0x0085, 0x00A0, 0x1680, 0x2028, 0x2029, 0x202F, 0x205F, 0x3000]) + + +# sort by codepoint +table_whitespace.sort() +table_lowercase.sort() +table_uppercase.sort() +table_nfd.sort() + + +# group ranges with same flags +ranges_flags: list[tuple[int, int]] = [(0, codepoint_flags[0])] # start, flags +for codepoint, flags in enumerate(codepoint_flags): + if flags != ranges_flags[-1][1]: + ranges_flags.append((codepoint, flags)) +ranges_flags.append((MAX_CODEPOINTS, 0x0000)) + + +# group ranges with same nfd +ranges_nfd: list[tuple[int, int, int]] = [(0, 0, 0)] # start, last, nfd +for codepoint, norm in table_nfd: + start = ranges_nfd[-1][0] + if ranges_nfd[-1] != (start, codepoint - 1, norm): + ranges_nfd.append(None) # type: ignore[arg-type] # dummy, will be replaced below + start = codepoint + ranges_nfd[-1] = (start, codepoint, norm) + + +# Generate 'unicode-data.cpp': +# python ./scripts//gen-unicode-data.py > unicode-data.cpp + +def out(line=""): + print(line, end='\n') # noqa + + +out("""\ +// generated with scripts/gen-unicode-data.py + +#include "unicode-data.h" + +#include <cstdint> +#include <vector> +#include <unordered_map> +#include <unordered_set> +""") + +out("const std::vector<std::pair<uint32_t, uint16_t>> unicode_ranges_flags = { // start, flags // last=next_start-1") +for codepoint, flags in ranges_flags: + out("{0x%06X, 0x%04X}," % (codepoint, flags)) +out("};\n") + +out("const std::unordered_set<uint32_t> unicode_set_whitespace = {") +for codepoint in table_whitespace: + out("0x%06X," % codepoint) +out("};\n") + +out("const std::unordered_map<uint32_t, uint32_t> unicode_map_lowercase = {") +for tuple_lw in table_lowercase: + out("{0x%06X, 0x%06X}," % tuple_lw) +out("};\n") + +out("const std::unordered_map<uint32_t, uint32_t> unicode_map_uppercase = {") +for tuple_up in table_uppercase: + out("{0x%06X, 0x%06X}," % tuple_up) +out("};\n") + +out("const std::vector<range_nfd> unicode_ranges_nfd = { // start, last, nfd") +for triple in ranges_nfd: + out("{0x%06X, 0x%06X, 0x%06X}," % triple) +out("};\n") diff --git a/llama.cpp/scripts/get-flags.mk b/llama.cpp/scripts/get-flags.mk new file mode 100644 index 0000000..a742766 --- /dev/null +++ b/llama.cpp/scripts/get-flags.mk @@ -0,0 +1,38 @@ +ifeq '' '$(findstring clang,$(shell $(GF_CC) --version))' + GF_CC_IS_GCC = 1 + GF_CC_VER := $(shell { $(GF_CC) -dumpfullversion 2>/dev/null; echo; $(GF_CC) -dumpversion; } | awk -F. '/./ { printf("%02d%02d%02d", $$1, $$2, $$3); exit }') +else + GF_CC_IS_CLANG = 1 + ifeq '' '$(findstring Apple,$(shell $(GF_CC) --version))' + GF_CC_IS_LLVM_CLANG = 1 + else + GF_CC_IS_APPLE_CLANG = 1 + endif + GF_CC_VER := \ + $(shell $(GF_CC) --version | sed -n 's/^.* version \([0-9.]*\).*$$/\1/p' \ + | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }') +endif + +ifeq ($(GF_CC_IS_CLANG), 1) + # clang options + GF_CFLAGS = -Wunreachable-code-break -Wunreachable-code-return + GF_CXXFLAGS = -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi + + ifneq '' '$(and $(GF_CC_IS_LLVM_CLANG),$(filter 1,$(shell expr $(GF_CC_VER) \>= 030800)))' + GF_CFLAGS += -Wdouble-promotion + endif + ifneq '' '$(and $(GF_CC_IS_APPLE_CLANG),$(filter 1,$(shell expr $(GF_CC_VER) \>= 070300)))' + GF_CFLAGS += -Wdouble-promotion + endif +else + # gcc options + GF_CFLAGS = -Wdouble-promotion + GF_CXXFLAGS = -Wno-array-bounds + + ifeq ($(shell expr $(GF_CC_VER) \>= 070100), 1) + GF_CXXFLAGS += -Wno-format-truncation + endif + ifeq ($(shell expr $(GF_CC_VER) \>= 080100), 1) + GF_CXXFLAGS += -Wextra-semi + endif +endif diff --git a/llama.cpp/scripts/get-hellaswag.sh b/llama.cpp/scripts/get-hellaswag.sh new file mode 100755 index 0000000..484e56f --- /dev/null +++ b/llama.cpp/scripts/get-hellaswag.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +wget https://raw.githubusercontent.com/klosax/hellaswag_text_data/main/hellaswag_val_full.txt + +echo "Usage:" +echo "" +echo " ./llama-perplexity -m model.gguf -f hellaswag_val_full.txt --hellaswag [--hellaswag-tasks N] [other params]" +echo "" + +exit 0 diff --git a/llama.cpp/scripts/get-pg.sh b/llama.cpp/scripts/get-pg.sh new file mode 100755 index 0000000..f180bf8 --- /dev/null +++ b/llama.cpp/scripts/get-pg.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash + +function usage { + echo "usage: <n>$0" + echo "note: n is the number of essays to download" + echo "for specific n, the resulting pg.txt file will have the following number of tokens:" + echo "n | tokens" + echo "--- | ---" + echo "1 | 6230" + echo "2 | 23619" + echo "5 | 25859" + echo "10 | 36888" + echo "15 | 50188" + echo "20 | 59094" + echo "25 | 88764" + echo "30 | 103121" + echo "32 | 108338" + echo "35 | 113403" + echo "40 | 127699" + echo "45 | 135896" + exit 1 +} + +function has_cmd { + if ! [ -x "$(command -v $1)" ]; then + echo "error: $1 is not available" >&2 + exit 1 + fi +} + +# check for: curl, html2text, tail, sed, fmt +has_cmd curl +has_cmd html2text +has_cmd tail +has_cmd sed + +if [ $# -ne 1 ]; then + usage +fi + +n=$1 + +# get urls +urls="$(curl http://www.aaronsw.com/2002/feeds/pgessays.rss | grep html | sed -e "s/.*http/http/" | sed -e "s/html.*/html/" | head -n $n)" + +printf "urls:\n%s\n" "$urls" + +if [ -f pg.txt ]; then + rm pg.txt +fi + +c=1 +for url in $urls; do + echo "processing $url" + + cc=$(printf "%03d" $c) + + curl -L $url | html2text | tail -n +4 | sed -E "s/^[[:space:]]+//g" | fmt -w 80 >> pg-$cc-one.txt + cat pg-$cc-one.txt >> pg.txt + + cp -v pg.txt pg-$cc-all.txt + c=$((c+1)) + + # don't flood the server + sleep 1 +done + +echo "done. data in pg.txt" + +exit 0 diff --git a/llama.cpp/scripts/get-wikitext-103.sh b/llama.cpp/scripts/get-wikitext-103.sh new file mode 100755 index 0000000..244a371 --- /dev/null +++ b/llama.cpp/scripts/get-wikitext-103.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +wget https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-raw-v1.zip + +echo "Usage:" +echo "" +echo " ./llama-perplexity -m model.gguf -f wiki.test.raw [other params]" +echo "" + +exit 0 diff --git a/llama.cpp/scripts/get-wikitext-2.sh b/llama.cpp/scripts/get-wikitext-2.sh new file mode 100755 index 0000000..67b0b01 --- /dev/null +++ b/llama.cpp/scripts/get-wikitext-2.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +wget https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip +unzip wikitext-2-raw-v1.zip + +echo "Usage:" +echo "" +echo " ./llama-perplexity -m model.gguf -f wikitext-2-raw/wiki.test.raw [other params]" +echo "" + +exit 0 diff --git a/llama.cpp/scripts/get-winogrande.sh b/llama.cpp/scripts/get-winogrande.sh new file mode 100755 index 0000000..2b48b11 --- /dev/null +++ b/llama.cpp/scripts/get-winogrande.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +wget https://huggingface.co/datasets/ikawrakow/winogrande-eval-for-llama.cpp/raw/main/winogrande-debiased-eval.csv + +echo "Usage:" +echo "" +echo " ./llama-perplexity -m model.gguf -f winogrande-debiased-eval.csv --winogrande [--winogrande-tasks N] [other params]" +echo "" + +exit 0 diff --git a/llama.cpp/scripts/get_chat_template.py b/llama.cpp/scripts/get_chat_template.py new file mode 100755 index 0000000..b4827b3 --- /dev/null +++ b/llama.cpp/scripts/get_chat_template.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python +''' + Fetches the Jinja chat template of a HuggingFace model. + If a model has multiple chat templates, you can specify the variant name. + + Syntax: + ./scripts/get_chat_template.py model_id [variant] + + Examples: + ./scripts/get_chat_template.py CohereForAI/c4ai-command-r-plus tool_use + ./scripts/get_chat_template.py microsoft/Phi-3.5-mini-instruct +''' + +import json +import re +import sys + + +def get_chat_template(model_id, variant=None): + try: + # Use huggingface_hub library if available. + # Allows access to gated models if the user has access and ran `huggingface-cli login`. + from huggingface_hub import hf_hub_download + with open(hf_hub_download(repo_id=model_id, filename="tokenizer_config.json"), encoding="utf-8") as f: + config_str = f.read() + except ImportError: + import requests + assert re.match(r"^[\w.-]+/[\w.-]+$", model_id), f"Invalid model ID: {model_id}" + response = requests.get(f"https://huggingface.co/{model_id}/resolve/main/tokenizer_config.json") + if response.status_code == 401: + raise Exception('Access to this model is gated, please request access, authenticate with `huggingface-cli login` and make sure to run `pip install huggingface_hub`') + response.raise_for_status() + config_str = response.text + + try: + config = json.loads(config_str) + except json.JSONDecodeError: + # Fix https://huggingface.co/NousResearch/Meta-Llama-3-8B-Instruct/blob/main/tokenizer_config.json + # (Remove extra '}' near the end of the file) + config = json.loads(re.sub(r'\}([\n\s]*\}[\n\s]*\],[\n\s]*"clean_up_tokenization_spaces")', r'\1', config_str)) + + chat_template = config['chat_template'] + if isinstance(chat_template, str): + return chat_template + else: + variants = { + ct['name']: ct['template'] + for ct in chat_template + } + + def format_variants(): + return ', '.join(f'"{v}"' for v in variants.keys()) + + if variant is None: + if 'default' not in variants: + raise Exception(f'Please specify a chat template variant (one of {format_variants()})') + variant = 'default' + sys.stderr.write(f'Note: picked "default" chat template variant (out of {format_variants()})\n') + elif variant not in variants: + raise Exception(f"Variant {variant} not found in chat template (found {format_variants()})") + + return variants[variant] + + +def main(args): + if len(args) < 1: + raise ValueError("Please provide a model ID and an optional variant name") + model_id = args[0] + variant = None if len(args) < 2 else args[1] + + template = get_chat_template(model_id, variant) + sys.stdout.write(template) + + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/llama.cpp/scripts/hf.sh b/llama.cpp/scripts/hf.sh new file mode 100755 index 0000000..e41b905 --- /dev/null +++ b/llama.cpp/scripts/hf.sh @@ -0,0 +1,112 @@ +#!/usr/bin/env bash +# +# Shortcut for downloading HF models +# +# Usage: +# ./llama-cli -m $(./scripts/hf.sh https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q4_K_M.gguf) +# ./llama-cli -m $(./scripts/hf.sh --url https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/blob/main/mixtral-8x7b-v0.1.Q4_K_M.gguf) +# ./llama-cli -m $(./scripts/hf.sh --repo TheBloke/Mixtral-8x7B-v0.1-GGUF --file mixtral-8x7b-v0.1.Q4_K_M.gguf) +# + +# all logs go to stderr +function log { + echo "$@" 1>&2 +} + +function usage { + log "Usage: $0 [[--url] <url>] [--repo <repo>] [--file <file>] [--outdir <dir> [-h|--help]" + exit 1 +} + +# check for curl or wget +function has_cmd { + if ! [ -x "$(command -v $1)" ]; then + return 1 + fi +} + +if has_cmd wget; then + cmd="wget -q -c -O %s/%s %s" +elif has_cmd curl; then + cmd="curl -C - -f --output-dir %s -o %s -L %s" +else + log "[E] curl or wget not found" + exit 1 +fi + +url="" +repo="" +file="" +outdir="." + +# parse args +while [[ $# -gt 0 ]]; do + case "$1" in + --url) + url="$2" + shift 2 + ;; + --repo) + repo="$2" + shift 2 + ;; + --file) + file="$2" + shift 2 + ;; + --outdir) + outdir="$2" + shift 2 + ;; + -h|--help) + usage + ;; + *) + url="$1" + shift + ;; + esac +done + +if [ -n "$repo" ] && [ -n "$file" ]; then + url="https://huggingface.co/$repo/resolve/main/$file" +fi + +if [ -z "$url" ]; then + log "[E] missing --url" + usage +fi + +# check if the URL is a HuggingFace model, and if so, try to download it +is_url=false + +if [[ ${#url} -gt 22 ]]; then + if [[ ${url:0:22} == "https://huggingface.co" ]]; then + is_url=true + fi +fi + +if [ "$is_url" = false ]; then + log "[E] invalid URL, must start with https://huggingface.co" + exit 0 +fi + +# replace "blob/main" with "resolve/main" +url=${url/blob\/main/resolve\/main} + +basename=$(basename $url) + +log "[+] attempting to download $basename" + +if [ -n "$cmd" ]; then + cmd=$(printf "$cmd" "$outdir" "$basename" "$url") + log "[+] $cmd" + if $cmd; then + echo $outdir/$basename + exit 0 + fi +fi + +log "[-] failed to download" + +exit 1 diff --git a/llama.cpp/scripts/install-oneapi.bat b/llama.cpp/scripts/install-oneapi.bat new file mode 100644 index 0000000..e99bef1 --- /dev/null +++ b/llama.cpp/scripts/install-oneapi.bat @@ -0,0 +1,19 @@ +:: MIT license +:: Copyright (C) 2024 Intel Corporation +:: SPDX-License-Identifier: MIT + + +set URL=%1 +set COMPONENTS=%2 + +curl.exe --output %TEMP%\webimage.exe --url %URL% --retry 5 --retry-delay 5 +start /b /wait %TEMP%\webimage.exe -s -x -f webimage_extracted --log extract.log +del %TEMP%\webimage.exe +if "%COMPONENTS%"=="" ( + webimage_extracted\bootstrapper.exe -s --action install --eula=accept -p=NEED_VS2017_INTEGRATION=0 -p=NEED_VS2019_INTEGRATION=0 -p=NEED_VS2022_INTEGRATION=0 --log-dir=. +) else ( + webimage_extracted\bootstrapper.exe -s --action install --components=%COMPONENTS% --eula=accept -p=NEED_VS2017_INTEGRATION=0 -p=NEED_VS2019_INTEGRATION=0 -p=NEED_VS2022_INTEGRATION=0 --log-dir=. +) +set installer_exit_code=%ERRORLEVEL% +rd /s/q "webimage_extracted" +exit /b %installer_exit_code% diff --git a/llama.cpp/scripts/jinja/jinja-tester.py b/llama.cpp/scripts/jinja/jinja-tester.py new file mode 100755 index 0000000..a489305 --- /dev/null +++ b/llama.cpp/scripts/jinja/jinja-tester.py @@ -0,0 +1,504 @@ +#!/usr/bin/env python3 +import sys +import json +import argparse +import jinja2.ext as jinja2_ext +from PySide6.QtWidgets import ( + QApplication, + QMainWindow, + QWidget, + QVBoxLayout, + QHBoxLayout, + QLabel, + QPlainTextEdit, + QTextEdit, + QPushButton, + QFileDialog, +) +from PySide6.QtGui import QColor, QColorConstants, QTextCursor, QTextFormat +from PySide6.QtCore import Qt, QRect, QSize +from jinja2 import TemplateSyntaxError +from jinja2.sandbox import ImmutableSandboxedEnvironment +from datetime import datetime + + +def format_template_content(template_content): + """Format the Jinja template content using Jinja2's lexer.""" + if not template_content.strip(): + return template_content + + env = ImmutableSandboxedEnvironment() + tc_rstrip = template_content.rstrip() + tokens = list(env.lex(tc_rstrip)) + result = "" + indent_level = 0 + i = 0 + + while i < len(tokens): + token = tokens[i] + _, token_type, token_value = token + + if token_type == "block_begin": + block_start = i + # Collect all tokens for this block construct + construct_content = token_value + end_token_type = token_type.replace("_begin", "_end") + j = i + 1 + while j < len(tokens) and tokens[j][1] != end_token_type: + construct_content += tokens[j][2] + j += 1 + + if j < len(tokens): # Found the end token + construct_content += tokens[j][2] + i = j # Skip to the end token + + # Check for control structure keywords for indentation + stripped_content = construct_content.strip() + instr = block_start + 1 + while tokens[instr][1] == "whitespace": + instr = instr + 1 + + instruction_token = tokens[instr][2] + start_control_tokens = ["if", "for", "macro", "call", "block"] + end_control_tokens = ["end" + t for t in start_control_tokens] + is_control_start = any( + instruction_token.startswith(kw) for kw in start_control_tokens + ) + is_control_end = any( + instruction_token.startswith(kw) for kw in end_control_tokens + ) + + # Adjust indentation for control structures + # For control end blocks, decrease indent BEFORE adding the content + if is_control_end: + indent_level = max(0, indent_level - 1) + + # Remove all previous whitespace before this block + result = result.rstrip() + + # Add proper indent, but only if this is not the first token + added_newline = False + if result: # Only add newline and indent if there's already content + result += ( + "\n" + " " * indent_level + ) # Use 2 spaces per indent level + added_newline = True + else: # For the first token, don't add any indent + result += "" + + # Add the block content + result += stripped_content + + # Add '-' after '%' if it wasn't there and we added a newline or indent + if ( + added_newline + and stripped_content.startswith("{%") + and not stripped_content.startswith("{%-") + ): + # Add '-' at the beginning + result = ( + result[: result.rfind("{%")] + + "{%-" + + result[result.rfind("{%") + 2 :] + ) + if stripped_content.endswith("%}") and not stripped_content.endswith( + "-%}" + ): + # Only add '-' if this is not the last token or if there's content after + if i + 1 < len(tokens) and tokens[i + 1][1] != "eof": + result = result[:-2] + "-%}" + + # For control start blocks, increase indent AFTER adding the content + if is_control_start: + indent_level += 1 + else: + # Malformed template, just add the token + result += token_value + elif token_type == "variable_begin": + # Collect all tokens for this variable construct + construct_content = token_value + end_token_type = token_type.replace("_begin", "_end") + j = i + 1 + while j < len(tokens) and tokens[j][1] != end_token_type: + construct_content += tokens[j][2] + j += 1 + + if j < len(tokens): # Found the end token + construct_content += tokens[j][2] + i = j # Skip to the end token + + # For variable constructs, leave them alone + # Do not add indent or whitespace before or after them + result += construct_content + else: + # Malformed template, just add the token + result += token_value + elif token_type == "data": + # Handle data (text between Jinja constructs) + # For data content, preserve it as is + result += token_value + else: + # Handle any other tokens + result += token_value + + i += 1 + + # Clean up trailing newlines and spaces + result = result.rstrip() + + # Copy the newline / space count from the original + if (trailing_length := len(template_content) - len(tc_rstrip)): + result += template_content[-trailing_length:] + + return result + + +# ------------------------ +# Line Number Widget +# ------------------------ +class LineNumberArea(QWidget): + def __init__(self, editor): + super().__init__(editor) + self.code_editor = editor + + def sizeHint(self): + return QSize(self.code_editor.line_number_area_width(), 0) + + def paintEvent(self, event): + self.code_editor.line_number_area_paint_event(event) + + +class CodeEditor(QPlainTextEdit): + def __init__(self): + super().__init__() + self.line_number_area = LineNumberArea(self) + + self.blockCountChanged.connect(self.update_line_number_area_width) + self.updateRequest.connect(self.update_line_number_area) + self.cursorPositionChanged.connect(self.highlight_current_line) + + self.update_line_number_area_width(0) + self.highlight_current_line() + + def line_number_area_width(self): + digits = len(str(self.blockCount())) + space = 3 + self.fontMetrics().horizontalAdvance("9") * digits + return space + + def update_line_number_area_width(self, _): + self.setViewportMargins(self.line_number_area_width(), 0, 0, 0) + + def update_line_number_area(self, rect, dy): + if dy: + self.line_number_area.scroll(0, dy) + else: + self.line_number_area.update( + 0, rect.y(), self.line_number_area.width(), rect.height() + ) + + if rect.contains(self.viewport().rect()): + self.update_line_number_area_width(0) + + def resizeEvent(self, event): + super().resizeEvent(event) + cr = self.contentsRect() + self.line_number_area.setGeometry( + QRect(cr.left(), cr.top(), self.line_number_area_width(), cr.height()) + ) + + def line_number_area_paint_event(self, event): + from PySide6.QtGui import QPainter + + painter = QPainter(self.line_number_area) + painter.fillRect(event.rect(), QColorConstants.LightGray) + + block = self.firstVisibleBlock() + block_number = block.blockNumber() + top = int( + self.blockBoundingGeometry(block).translated(self.contentOffset()).top() + ) + bottom = top + int(self.blockBoundingRect(block).height()) + + while block.isValid() and top <= event.rect().bottom(): + if block.isVisible() and bottom >= event.rect().top(): + number = str(block_number + 1) + painter.setPen(QColorConstants.Black) + painter.drawText( + 0, + top, + self.line_number_area.width() - 2, + self.fontMetrics().height(), + Qt.AlignmentFlag.AlignRight, + number, + ) + block = block.next() + top = bottom + bottom = top + int(self.blockBoundingRect(block).height()) + block_number += 1 + + def highlight_current_line(self): + extra_selections = [] + if not self.isReadOnly(): + selection = QTextEdit.ExtraSelection() + line_color = QColorConstants.Yellow.lighter(160) + selection.format.setBackground(line_color) # pyright: ignore[reportAttributeAccessIssue] + selection.format.setProperty(QTextFormat.Property.FullWidthSelection, True) # pyright: ignore[reportAttributeAccessIssue] + selection.cursor = self.textCursor() # pyright: ignore[reportAttributeAccessIssue] + selection.cursor.clearSelection() # pyright: ignore[reportAttributeAccessIssue] + extra_selections.append(selection) + self.setExtraSelections(extra_selections) + + def highlight_position(self, lineno: int, col: int, color: QColor): + block = self.document().findBlockByLineNumber(lineno - 1) + if block.isValid(): + cursor = QTextCursor(block) + text = block.text() + start = block.position() + max(0, col - 1) + cursor.setPosition(start) + if col <= len(text): + cursor.movePosition( + QTextCursor.MoveOperation.NextCharacter, + QTextCursor.MoveMode.KeepAnchor, + ) + + extra = QTextEdit.ExtraSelection() + extra.format.setBackground(color.lighter(160)) # pyright: ignore[reportAttributeAccessIssue] + extra.cursor = cursor # pyright: ignore[reportAttributeAccessIssue] + + self.setExtraSelections(self.extraSelections() + [extra]) + + def highlight_line(self, lineno: int, color: QColor): + block = self.document().findBlockByLineNumber(lineno - 1) + if block.isValid(): + cursor = QTextCursor(block) + cursor.select(QTextCursor.SelectionType.LineUnderCursor) + + extra = QTextEdit.ExtraSelection() + extra.format.setBackground(color.lighter(160)) # pyright: ignore[reportAttributeAccessIssue] + extra.cursor = cursor # pyright: ignore[reportAttributeAccessIssue] + + self.setExtraSelections(self.extraSelections() + [extra]) + + def clear_highlighting(self): + self.highlight_current_line() + + +# ------------------------ +# Main App +# ------------------------ +class JinjaTester(QMainWindow): + def __init__(self): + super().__init__() + self.setWindowTitle("Jinja Template Tester") + self.resize(1200, 800) + + central = QWidget() + main_layout = QVBoxLayout(central) + + # -------- Top input area -------- + input_layout = QHBoxLayout() + + # Template editor with label + template_layout = QVBoxLayout() + template_label = QLabel("Jinja2 Template") + template_layout.addWidget(template_label) + self.template_edit = CodeEditor() + template_layout.addWidget(self.template_edit) + input_layout.addLayout(template_layout) + + # JSON editor with label + json_layout = QVBoxLayout() + json_label = QLabel("Context (JSON)") + json_layout.addWidget(json_label) + self.json_edit = CodeEditor() + self.json_edit.setPlainText(""" +{ + "add_generation_prompt": true, + "bos_token": "", + "eos_token": "", + "messages": [ + { + "role": "user", + "content": "What is the capital of Poland?" + } + ] +} + """.strip()) + json_layout.addWidget(self.json_edit) + input_layout.addLayout(json_layout) + + main_layout.addLayout(input_layout) + + # -------- Rendered output area -------- + output_label = QLabel("Rendered Output") + main_layout.addWidget(output_label) + self.output_edit = QPlainTextEdit() + self.output_edit.setReadOnly(True) + main_layout.addWidget(self.output_edit) + + # -------- Render button and status -------- + btn_layout = QHBoxLayout() + + # Load template button + self.load_btn = QPushButton("Load Template") + self.load_btn.clicked.connect(self.load_template) + btn_layout.addWidget(self.load_btn) + + # Format template button + self.format_btn = QPushButton("Format") + self.format_btn.clicked.connect(self.format_template) + btn_layout.addWidget(self.format_btn) + + self.render_btn = QPushButton("Render") + self.render_btn.clicked.connect(self.render_template) + btn_layout.addWidget(self.render_btn) + main_layout.addLayout(btn_layout) + + # Status label below buttons + self.status_label = QLabel("Ready") + main_layout.addWidget(self.status_label) + + self.setCentralWidget(central) + + def render_template(self): + self.template_edit.clear_highlighting() + self.output_edit.clear() + + template_str = self.template_edit.toPlainText() + json_str = self.json_edit.toPlainText() + + # Parse JSON context + try: + context = json.loads(json_str) if json_str.strip() else {} + except Exception as e: + self.status_label.setText(f"❌ JSON Error: {e}") + return + + def raise_exception(text: str) -> str: + raise RuntimeError(text) + + env = ImmutableSandboxedEnvironment( + trim_blocks=True, + lstrip_blocks=True, + extensions=[jinja2_ext.loopcontrols], + ) + env.filters["tojson"] = ( + lambda x, + indent=None, + separators=None, + sort_keys=False, + ensure_ascii=False: json.dumps( + x, + indent=indent, + separators=separators, + sort_keys=sort_keys, + ensure_ascii=ensure_ascii, + ) + ) + env.globals["strftime_now"] = lambda format: datetime.now().strftime(format) + env.globals["raise_exception"] = raise_exception + try: + template = env.from_string(template_str) + output = template.render(context) + self.output_edit.setPlainText(output) + self.status_label.setText("✅ Render successful") + except TemplateSyntaxError as e: + self.status_label.setText(f"❌ Syntax Error (line {e.lineno}): {e.message}") + if e.lineno: + self.template_edit.highlight_line(e.lineno, QColor("red")) + except Exception as e: + # Catch all runtime errors + # Try to extract template line number + lineno = None + tb = e.__traceback__ + while tb: + frame = tb.tb_frame + if frame.f_code.co_filename == "<template>": + lineno = tb.tb_lineno + break + tb = tb.tb_next + + error_msg = f"Runtime Error: {type(e).__name__}: {e}" + if lineno: + error_msg = f"Runtime Error at line {lineno} in template: {type(e).__name__}: {e}" + self.template_edit.highlight_line(lineno, QColor("orange")) + + self.output_edit.setPlainText(error_msg) + self.status_label.setText(f"❌ {error_msg}") + + def load_template(self): + """Load a Jinja template from a file using a file dialog.""" + file_path, _ = QFileDialog.getOpenFileName( + self, + "Load Jinja Template", + "", + "Template Files (*.jinja *.j2 *.html *.txt);;All Files (*)", + ) + + if file_path: + try: + with open(file_path, "r", encoding="utf-8") as file: + content = file.read() + self.template_edit.setPlainText(content) + self.status_label.setText(f"✅ Loaded template from {file_path}") + except Exception as e: + self.status_label.setText(f"❌ Error loading file: {str(e)}") + + def format_template(self): + """Format the Jinja template using Jinja2's lexer for proper parsing.""" + try: + template_content = self.template_edit.toPlainText() + if not template_content.strip(): + self.status_label.setText("⚠️ Template is empty") + return + + formatted_content = format_template_content(template_content) + self.template_edit.setPlainText(formatted_content) + self.status_label.setText("✅ Template formatted") + except Exception as e: + self.status_label.setText(f"❌ Error formatting template: {str(e)}") + + +if __name__ == "__main__": + if len(sys.argv) > 1: + # CLI mode + parser = argparse.ArgumentParser(description="Jinja Template Tester") + parser.add_argument( + "--template", required=True, help="Path to Jinja template file" + ) + parser.add_argument("--context", required=True, help="JSON string for context") + parser.add_argument( + "--action", + choices=["format", "render"], + default="render", + help="Action to perform", + ) + args = parser.parse_args() + + # Load template + with open(args.template, "r", encoding="utf-8") as f: + template_content = f.read() + + # Load JSON + context = json.loads(args.context) + # Add missing variables + context.setdefault("bos_token", "") + context.setdefault("eos_token", "") + context.setdefault("add_generation_prompt", False) + + env = ImmutableSandboxedEnvironment() + + if args.action == "format": + formatted = format_template_content(template_content) + print(formatted) # noqa: NP100 + elif args.action == "render": + template = env.from_string(template_content) + output = template.render(context) + print(output) # noqa: NP100 + + else: + # GUI mode + app = QApplication(sys.argv) + window = JinjaTester() + window.show() + sys.exit(app.exec()) diff --git a/llama.cpp/scripts/jinja/requirements.txt b/llama.cpp/scripts/jinja/requirements.txt new file mode 100644 index 0000000..253685b --- /dev/null +++ b/llama.cpp/scripts/jinja/requirements.txt @@ -0,0 +1,2 @@ +PySide6 +jinja2 diff --git a/llama.cpp/scripts/pr2wt.sh b/llama.cpp/scripts/pr2wt.sh new file mode 100755 index 0000000..bd635f3 --- /dev/null +++ b/llama.cpp/scripts/pr2wt.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash + +# intialize a new worktree from a PR number: +# +# - creates a new remote using the fork's clone URL +# - creates a local branch tracking the remote branch +# - creates a new worktree in a parent folder, suffixed with "-pr-$PR" +# +# sample usage: +# ./scripts/pr2wt.sh 12345 +# ./scripts/pr2wt.sh 12345 opencode +# ./scripts/pr2wt.sh 12345 "cmake -B build && cmake --build build" +# ./scripts/pr2wt.sh 12345 "bash -l" + +function usage() { + echo "usage: $0 <pr_number> [cmd]" + exit 1 +} + +# check we are in the right directory +if [[ ! -f "scripts/pr2wt.sh" ]]; then + echo "error: this script must be run from the root of the repository" + exit 1 +fi + +if [[ $# -lt 1 || $# -gt 2 ]]; then + usage +fi + +PR=$1 +[[ "$PR" =~ ^[0-9]+$ ]] || { echo "error: PR number must be numeric"; exit 1; } + +url_origin=$(git config --get remote.origin.url) || { + echo "error: no remote named 'origin' in this repository" + exit 1 +} + +org_repo=$(echo $url_origin | cut -d/ -f4-) +org_repo=${org_repo%.git} + +echo "org/repo: $org_repo" + +meta=$(curl -sSLf -H "Accept: application/vnd.github+json" "https://api.github.com/repos/$org_repo/pulls/$PR") + +url_remote=$(echo "$meta" | jq -r '.head.repo.clone_url') +head_ref=$(echo "$meta" | jq -r '.head.ref') + +echo "url: $url_remote" +echo "head_ref: $head_ref" + +url_remote_cur=$(git config --get "remote.pr/$PR.url" 2>/dev/null || true) + +if [[ "$url_remote_cur" != "$url_remote" ]]; then + git remote rm pr/$PR 2> /dev/null + git remote add pr/$PR "$url_remote" +fi + +git fetch "pr/$PR" "$head_ref" + +dir=$(basename $(pwd)) + +git branch -D pr/$PR 2> /dev/null +git worktree add -b pr/$PR ../$dir-pr-$PR pr/$PR/$head_ref 2> /dev/null + +wt_path=$(cd ../$dir-pr-$PR && pwd) + +echo "git worktree created in $wt_path" + +cd $wt_path +git branch --set-upstream-to=pr/$PR/$head_ref +git pull --ff-only || { + echo "error: failed to pull pr/$PR" + exit 1 +} + +if [[ $# -eq 2 ]]; then + echo "executing: $2" + eval "$2" +fi diff --git a/llama.cpp/scripts/serve-static.js b/llama.cpp/scripts/serve-static.js new file mode 100644 index 0000000..df4953e --- /dev/null +++ b/llama.cpp/scripts/serve-static.js @@ -0,0 +1,110 @@ +const http = require('http'); +const fs = require('fs').promises; +const path = require('path'); + +// This file is used for testing wasm build from emscripten +// Example build command: +// emcmake cmake -B build-wasm -DGGML_WEBGPU=ON -DLLAMA_OPENSSL=OFF +// cmake --build build-wasm --target test-backend-ops -j + +const PORT = 8080; +const STATIC_DIR = path.join(__dirname, '../build-wasm/bin'); +console.log(`Serving static files from: ${STATIC_DIR}`); + +const mimeTypes = { + '.html': 'text/html', + '.js': 'text/javascript', + '.css': 'text/css', + '.png': 'image/png', + '.jpg': 'image/jpeg', + '.gif': 'image/gif', + '.svg': 'image/svg+xml', + '.json': 'application/json', + '.woff': 'font/woff', + '.woff2': 'font/woff2', +}; + +async function generateDirListing(dirPath, reqUrl) { + const files = await fs.readdir(dirPath); + let html = ` + <!DOCTYPE html> + <html> + <head> + <title>Directory Listing</title> + <style> + body { font-family: Arial, sans-serif; padding: 20px; } + ul { list-style: none; padding: 0; } + li { margin: 5px 0; } + a { text-decoration: none; color: #0066cc; } + a:hover { text-decoration: underline; } + </style> + </head> + <body> + <h1>Directory: ${reqUrl}</h1> + <ul> + `; + + if (reqUrl !== '/') { + html += `<li><a href="../">../ (Parent Directory)</a></li>`; + } + + for (const file of files) { + const filePath = path.join(dirPath, file); + const stats = await fs.stat(filePath); + const link = encodeURIComponent(file) + (stats.isDirectory() ? '/' : ''); + html += `<li><a href="${link}">${file}${stats.isDirectory() ? '/' : ''}</a></li>`; + } + + html += ` + </ul> + </body> + </html> + `; + return html; +} + +const server = http.createServer(async (req, res) => { + try { + // Set COOP and COEP headers + res.setHeader('Cross-Origin-Opener-Policy', 'same-origin'); + res.setHeader('Cross-Origin-Embedder-Policy', 'require-corp'); + res.setHeader('Cache-Control', 'no-store, no-cache, must-revalidate, proxy-revalidate'); + res.setHeader('Pragma', 'no-cache'); + res.setHeader('Expires', '0'); + + const filePath = path.join(STATIC_DIR, decodeURIComponent(req.url)); + const stats = await fs.stat(filePath); + + if (stats.isDirectory()) { + const indexPath = path.join(filePath, 'index.html'); + try { + const indexData = await fs.readFile(indexPath); + res.writeHeader(200, { 'Content-Type': 'text/html' }); + res.end(indexData); + } catch { + // No index.html, generate directory listing + const dirListing = await generateDirListing(filePath, req.url); + res.writeHeader(200, { 'Content-Type': 'text/html' }); + res.end(dirListing); + } + } else { + const ext = path.extname(filePath).toLowerCase(); + const contentType = mimeTypes[ext] || 'application/octet-stream'; + const data = await fs.readFile(filePath); + res.writeHeader(200, { 'Content-Type': contentType }); + res.end(data); + } + } catch (err) { + if (err.code === 'ENOENT') { + res.writeHeader(404, { 'Content-Type': 'text/plain' }); + res.end('404 Not Found'); + } else { + res.writeHeader(500, { 'Content-Type': 'text/plain' }); + res.end('500 Internal Server Error'); + } + } +}); + +server.listen(PORT, () => { + console.log(`Server running at http://localhost:${PORT}/`); +}); diff --git a/llama.cpp/scripts/server-bench.py b/llama.cpp/scripts/server-bench.py new file mode 100755 index 0000000..dbbb093 --- /dev/null +++ b/llama.cpp/scripts/server-bench.py @@ -0,0 +1,297 @@ +#!/usr/bin/env python3 + +import argparse +import json +import os +import random +import sqlite3 +import subprocess +from time import sleep, time +from typing import Optional, Union + +import datasets +import logging +import matplotlib.pyplot as plt +import numpy as np +import requests +from tqdm.contrib.concurrent import thread_map + + +logging.basicConfig(level=logging.INFO, format='%(message)s') +logger = logging.getLogger("server-bench") + + +def get_prompts_text(dataset_name: str, n_prompts: int) -> Optional[list[str]]: + ret = [] + if dataset_name.lower() == "mmlu": + logger.info("Loading MMLU dataset...") + ret = datasets.load_dataset("cais/mmlu", "all")["test"]["question"] # type: ignore + else: + return None + if n_prompts >= 0: + ret = ret[:n_prompts] + return ret + + +def get_prompt_lengths_rng(n_prompts: int, prompt_length_min: int, prompt_length_max: int, seed_offset: int) -> list[int]: + assert n_prompts >= 0 + ret: list[int] = [] + for i in range(n_prompts): + if seed_offset >= 0: + random.seed(3 * (seed_offset + 1000 * i) + 0) + ret.append(random.randint(prompt_length_min, prompt_length_max)) + return ret + + +def get_prompts_rng(prompt_lengths: list[int]) -> list[list[int]]: + return [[random.randint(100, 10000) for _ in range(pl)] for pl in prompt_lengths] + + +def get_server(path_server: str, path_log: Optional[str]) -> dict: + if path_server.startswith("http://") or path_server.startswith("https://"): + return {"process": None, "address": path_server, "fout": None} + if os.environ.get("LLAMA_ARG_HOST") is None: + logger.info("LLAMA_ARG_HOST not explicitly set, using 127.0.0.1") + os.environ["LLAMA_ARG_HOST"] = "127.0.0.1" + if os.environ.get("LLAMA_ARG_PORT") is None: + logger.info("LLAMA_ARG_PORT not explicitly set, using 8080") + os.environ["LLAMA_ARG_PORT"] = "8080" + hostname: Optional[str] = os.environ.get("LLAMA_ARG_HOST") + port: Optional[str] = os.environ.get("LLAMA_ARG_PORT") + assert hostname is not None + assert port is not None + address: str = f"http://{hostname}:{port}" + logger.info(f"Starting the llama.cpp server under {address}...") + + fout = open(path_log.format(port=port), "w") if path_log is not None else subprocess.DEVNULL + process = subprocess.Popen([path_server], stdout=fout, stderr=subprocess.STDOUT) + + n_failures: int = 0 + while True: + try: + sleep(1.0) + exit_code = process.poll() + if exit_code is not None: + raise RuntimeError(f"llama.cpp server exited unexpectedly with exit code {exit_code}{path_log and f', see {path_log.format(port=port)}' or ''}") + response = requests.get(f"{address}/health") + if response.status_code == 200: + break + except requests.ConnectionError: + n_failures += 1 + if n_failures >= 10: + raise RuntimeError("llama.cpp server is not healthy after 10 seconds") + + return {"process": process, "address": address, "fout": fout} + + +def get_prompt_length(data: dict) -> int: + session = data["session"] + server_address: str = data["server_address"] + + response = session.post( + f"{server_address}/apply-template", + json={"messages": [{"role": "user", "content": data["prompt"], "stream": True}]} + ) + response.raise_for_status() + prompt: str = json.loads(response.text)["prompt"] + response = session.post( + f"{server_address}/tokenize", + json={"content": prompt, "add_special": True} + ) + response.raise_for_status() + tokens: list[str] = json.loads(response.text)["tokens"] + return len(tokens) + + +def send_prompt(data: dict) -> tuple[float, list[float]]: + session = data["session"] + server_address: str = data["server_address"] + + t_submit = time() + if data["external_server"]: + json_data: dict = { + "prompt": data["prompt"], "ignore_eos": True, + "seed": data["seed"], "max_tokens": data["n_predict"], "stream": True} + response = session.post(f"{server_address}/v1/completions", json=json_data, stream=True) + elif data["synthetic_prompt"]: + json_data: dict = { + "prompt": data["prompt"], "ignore_eos": True, "cache_prompt": False, + "seed": data["seed"], "n_predict": data["n_predict"], "stream": True} + response = session.post(f"{server_address}/completion", json=json_data, stream=True) + else: + response = session.post( + f"{server_address}/apply-template", + json={"messages": [{"role": "user", "content": data["prompt"], "stream": True}]} + ) + response.raise_for_status() + prompt: str = json.loads(response.text)["prompt"] + + json_data: dict = {"prompt": prompt, "seed": data["seed"], "n_predict": data["n_predict"], "stream": True} + response = session.post(f"{server_address}/completion", json=json_data, stream=True) + response.raise_for_status() + + lines = [] + token_arrival_times: list[float] = [] + for line in response.iter_lines(decode_unicode=False): + if not line.startswith(b"data: "): + continue + lines.append(line) + token_arrival_times.append(time()) + token_arrival_times = token_arrival_times[:-1] + if len(lines) > 1 and "timings" in json.loads(lines[-2][6:]): + token_arrival_times = token_arrival_times[:-1] + + return (t_submit, token_arrival_times) + + +def benchmark( + path_server: str, path_log: Optional[str], path_db: Optional[str], name: Optional[str], prompt_source: str, n_prompts: int, + n_predict: int, n_predict_min: int, seed_offset: int): + external_server: bool = path_server.startswith("http://") or path_server.startswith("https://") + if os.environ.get("LLAMA_ARG_N_PARALLEL") is None: + logger.info("LLAMA_ARG_N_PARALLEL not explicitly set, using 32") + os.environ["LLAMA_ARG_N_PARALLEL"] = "32" + + parallel: int = int(os.environ.get("LLAMA_ARG_N_PARALLEL")) # type: ignore + prompts: Union[None, list[str], list[list[int]]] = get_prompts_text(prompt_source, n_prompts) + synthetic_prompts: bool = prompts is None + prompt_n = [] + + if synthetic_prompts: + prompt_source_split: list[str] = prompt_source.split("-") + assert len(prompt_source_split) == 3 + assert prompt_source_split[0].lower() == "rng" + prompt_length_min: int = int(prompt_source_split[1]) + prompt_length_max: int = int(prompt_source_split[2]) + logger.info("Generating random prompts...") + prompt_n = get_prompt_lengths_rng(n_prompts, prompt_length_min, prompt_length_max, seed_offset) + prompts = get_prompts_rng(prompt_n) + else: + n_predict_min = n_predict + + if not external_server and os.environ.get("LLAMA_ARG_CTX_SIZE") is None: + context_per_slot: int = int(1.05 * (n_predict + (np.max(prompt_n) if synthetic_prompts else 2048))) + context_total: int = context_per_slot * parallel + os.environ["LLAMA_ARG_CTX_SIZE"] = str(context_total) + logger.info(f"LLAMA_ARG_CTX_SIZE not explicitly set, using {context_total} ({context_per_slot} per slot).") + + server: Optional[dict] = None + session = None + try: + server = get_server(path_server, path_log) + server_address: str = server["address"] + assert external_server == (server["process"] is None) + + adapter = requests.adapters.HTTPAdapter(pool_connections=parallel, pool_maxsize=parallel) # type: ignore + session = requests.Session() + session.mount("http://", adapter) + session.mount("https://", adapter) + + data: list[dict] = [] + + for i, p in enumerate(prompts): + if seed_offset >= 0: + random.seed(3 * (seed_offset + 1000 * i) + 1) + data.append({ + "session": session, "server_address": server_address, "external_server": external_server, "prompt": p, + "synthetic_prompt": synthetic_prompts, "n_predict": random.randint(n_predict_min, n_predict), + "seed": (3 * (seed_offset + 1000 * i) + 2) if seed_offset >= 0 else -1}) + + if not synthetic_prompts: + logger.info("Getting the prompt lengths...") + prompt_n = [get_prompt_length(d) for d in data] + + logger.info("Starting the benchmark...\n") + t0 = time() + results: list[tuple[float, list[float]]] = thread_map(send_prompt, data, max_workers=parallel, chunksize=1) + finally: + if server is not None and server["process"] is not None: + server["process"].terminate() + server["process"].wait() + if session is not None: + session.close() + + prompt_t = [] + token_t = [] + depth_sum: int = 0 + for pn, (t_submit, tat) in zip(prompt_n, results): + prompt_t.append(tat[0] - t_submit) + token_t += tat + n_tokens: int = len(tat) + depth_sum += n_tokens * pn + depth_sum += n_tokens * (n_tokens + 1) // 2 + assert len(token_t) > 0 + prompt_n = np.array(prompt_n, dtype=np.int64) + prompt_t = np.array(prompt_t, dtype=np.float64) + token_t = np.array(token_t, dtype=np.float64) + + token_t -= t0 + token_t_last = np.max(token_t) + + logger.info("") + logger.info(f"Benchmark duration: {token_t_last:.2f} s") + logger.info(f"Request throughput: {n_prompts / token_t_last:.2f} requests/s = {n_prompts / (token_t_last/60):.2f} requests/min") + logger.info(f"Total prompt length: {np.sum(prompt_n)} tokens") + logger.info(f"Average prompt length: {np.mean(prompt_n):.2f} tokens") + logger.info(f"Average prompt latency: {1e3 * np.mean(prompt_t):.2f} ms") + logger.info(f"Average prompt speed: {np.sum(prompt_n) / np.sum(prompt_t):.2f} tokens/s") + logger.info(f"Total generated tokens: {token_t.shape[0]}") + logger.info(f"Average generation depth: {depth_sum / token_t.shape[0]:.2f} tokens") + logger.info(f"Average total generation speed: {token_t.shape[0] / token_t_last:.2f} tokens/s") + logger.info(f"Average generation speed per slot: {token_t.shape[0] / (parallel * token_t_last):.2f} tokens/s / slot") + + if path_db is not None: + con = sqlite3.connect(path_db) + cursor = con.cursor() + cursor.execute( + "CREATE TABLE IF NOT EXISTS server_bench" + "(name TEXT, n_parallel INTEGER, prompt_source TEXT, n_prompts INTEGER, " + "n_predict INTEGER, n_predict_min INTEGER, seed_offset INTEGER, runtime REAL);") + cursor.execute( + "INSERT INTO server_bench VALUES (?, ?, ?, ?, ?, ?, ?, ?);", + [name, parallel, prompt_source, n_prompts, n_predict, n_predict_min, seed_offset, token_t_last]) + con.commit() + + plt.figure() + plt.scatter(prompt_n, 1e3 * prompt_t, s=10.0, marker=".", alpha=0.25) + plt.xlim(0, 1.05e0 * np.max(prompt_n)) + plt.ylim(0, 1.05e3 * np.max(prompt_t)) + plt.title(name or "") + plt.xlabel("Prompt length [tokens]") + plt.ylabel("Time to first token [ms]") + plt.savefig("prompt_time.png", dpi=240) + + bin_max = np.ceil(token_t_last) + 1 + plt.figure() + plt.hist(token_t, np.arange(0, bin_max)) + plt.xlim(0, bin_max + 1) + plt.title(name or "") + plt.xlabel("Time [s]") + plt.ylabel("Num. tokens generated per second") + plt.savefig("gen_rate.png", dpi=240) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Tool for benchmarking the throughput of the llama.cpp HTTP server. " + "Results are printed to console and visualized as plots (saved to current working directory). " + "To pass arguments such as the model path to the server, set the corresponding environment variables (see llama-server --help). " + "The reported numbers are the speeds as observed by the Python script and may differ from the performance reported by the server, " + "particularly when the server is fast vs. the network or Python script (e.g. when serving a very small model).") + parser.add_argument("--path_server", type=str, default="llama-server", help="Path to the llama.cpp server binary") + parser.add_argument("--path_log", type=str, default="server-bench-{port}.log", help="Path to the model to use for the benchmark") + parser.add_argument("--path_db", type=str, default=None, help="Path to an sqlite database to store the benchmark results in") + parser.add_argument("--name", type=str, default=None, help="Name to label plots and database entries with") + parser.add_argument( + "--prompt_source", type=str, default="rng-1024-2048", + help="How to get the prompts for the benchmark, either 'mmlu' for MMLU questions or " + "rng-MIN-MAX for synthetic prompts with random lengths in the interval [MIN, MAX]") + parser.add_argument("--n_prompts", type=int, default=100, help="Number of prompts to evaluate") + parser.add_argument("--n_predict", type=int, default=2048, help="Max. number of tokens to predict per prompt") + parser.add_argument( + "--n_predict_min", type=int, default=1024, + help="Min. number of tokens to predict per prompt (supported for synthetic prompts only)") + parser.add_argument("--seed_offset", type=int, default=0, help="Offset for determining the seeds for pseudorandom prompt/generation lengths. " + "Corelations between seeds can occur when set >= 1000. Negative values mean no seed.") + args = parser.parse_args() + benchmark(**vars(args)) diff --git a/llama.cpp/scripts/snapdragon/adb/llama-cli.farf b/llama.cpp/scripts/snapdragon/adb/llama-cli.farf new file mode 100644 index 0000000..de84fe8 --- /dev/null +++ b/llama.cpp/scripts/snapdragon/adb/llama-cli.farf @@ -0,0 +1 @@ +0xffff diff --git a/llama.cpp/scripts/snapdragon/adb/run-bench.sh b/llama.cpp/scripts/snapdragon/adb/run-bench.sh new file mode 100755 index 0000000..2750860 --- /dev/null +++ b/llama.cpp/scripts/snapdragon/adb/run-bench.sh @@ -0,0 +1,52 @@ +#!/bin/sh +# + +# Basedir on device +basedir=/data/local/tmp/llama.cpp + +branch=. +[ "$B" != "" ] && branch=$B + +adbserial= +[ "$S" != "" ] && adbserial="-s $S" + +adbhost= +[ "$H" != "" ] && adbhost="-H $H" + +model="Llama-3.2-3B-Instruct-Q4_0.gguf" +[ "$M" != "" ] && model="$M" + +device="HTP0" +[ "$D" != "" ] && device="$D" + +verbose= +[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V" cli_opts="$cli_opts -v" + +experimental= +[ "$E" != "" ] && experimental="GGML_HEXAGON_EXPERIMENTAL=$E" + +profile= +[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF GGML_HEXAGON_OPSYNC=1" cli_opts="$cli_opts -v" + +opmask= +[ "$OPMASK" != "" ] && opmask="GGML_HEXAGON_OPMASK=$OPMASK" + +nhvx= +[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX" + +ndev= +[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV" + +hb= +[ "$HB" != "" ] && hb="GGML_HEXAGON_HOSTBUF=$HB" + +set -x + +adb $adbserial $adbhost shell " \ + cd $basedir; \ + LD_LIBRARY_PATH=$basedir/$branch/lib \ + ADSP_LIBRARY_PATH=$basedir/$branch/lib \ + $ndev $nhvx $opmask $verbose $experimental $profile $hb ./$branch/bin/llama-bench --device $device --mmap 0 -m $basedir/../gguf/$model \ + --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \ + --batch-size 128 -ngl 99 $cli_opts $@ \ +" diff --git a/llama.cpp/scripts/snapdragon/adb/run-cli.sh b/llama.cpp/scripts/snapdragon/adb/run-cli.sh new file mode 100755 index 0000000..d19d4e9 --- /dev/null +++ b/llama.cpp/scripts/snapdragon/adb/run-cli.sh @@ -0,0 +1,59 @@ +#!/bin/sh +# + +# Basedir on device +basedir=/data/local/tmp/llama.cpp + +cli_opts= + +branch=. +[ "$B" != "" ] && branch=$B + +adbserial= +[ "$S" != "" ] && adbserial="-s $S" + +adbhost= +[ "$H" != "" ] && adbhost="-H $H" + +model="Llama-3.2-3B-Instruct-Q4_0.gguf" +[ "$M" != "" ] && model="$M" + +device="HTP0" +[ "$D" != "" ] && device="$D" + +experimental= +[ "$E" != "" ] && experimental="GGML_HEXAGON_EXPERIMENTAL=$E" + +verbose= +[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V" cli_opts="$cli_opts -v" + +sched= +[ "$SCHED" != "" ] && sched="GGML_SCHED_DEBUG=2" cli_opts="$cli_opts -v" + +profile= +[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF GGML_HEXAGON_OPSYNC=1" cli_opts="$cli_opts -v" + +opmask= +[ "$OPMASK" != "" ] && opmask="GGML_HEXAGON_OPMASK=$OPMASK" + +nhvx= +[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX" + +ndev= +[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV" + +hb= +[ "$HB" != "" ] && hb="GGML_HEXAGON_HOSTBUF=$HB" + +set -x + +adb $adbserial $adbhost shell " \ + cd $basedir; ulimit -c unlimited; \ + LD_LIBRARY_PATH=$basedir/$branch/lib \ + ADSP_LIBRARY_PATH=$basedir/$branch/lib \ + $verbose $experimental $sched $opmask $profile $nhvx $ndev $hb \ + ./$branch/bin/llama-cli --no-mmap -m $basedir/../gguf/$model \ + --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \ + --ctx-size 8192 --batch-size 128 -fa on \ + -ngl 99 --device $device $cli_opts $@ \ +" diff --git a/llama.cpp/scripts/snapdragon/adb/run-completion.sh b/llama.cpp/scripts/snapdragon/adb/run-completion.sh new file mode 100755 index 0000000..da9df11 --- /dev/null +++ b/llama.cpp/scripts/snapdragon/adb/run-completion.sh @@ -0,0 +1,59 @@ +#!/bin/sh +# + +# Basedir on device +basedir=/data/local/tmp/llama.cpp + +cli_opts= + +branch=. +[ "$B" != "" ] && branch=$B + +adbserial= +[ "$S" != "" ] && adbserial="-s $S" + +adbhost= +[ "$H" != "" ] && adbhost="-H $H" + +model="Llama-3.2-3B-Instruct-Q4_0.gguf" +[ "$M" != "" ] && model="$M" + +device="HTP0" +[ "$D" != "" ] && device="$D" + +experimental= +[ "$E" != "" ] && experimental="GGML_HEXAGON_EXPERIMENTAL=$E" + +verbose= +[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V" cli_opts="$cli_opts -v" + +sched= +[ "$SCHED" != "" ] && sched="GGML_SCHED_DEBUG=2" cli_opts="$cli_opts -v" + +profile= +[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF GGML_HEXAGON_OPSYNC=1" cli_opts="$cli_opts -v" + +opmask= +[ "$OPMASK" != "" ] && opmask="GGML_HEXAGON_OPMASK=$OPMASK" + +nhvx= +[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX" + +ndev= +[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV" + +hb= +[ "$HB" != "" ] && hb="GGML_HEXAGON_HOSTBUF=$HB" + +set -x + +adb $adbserial $adbhost shell " \ + cd $basedir; ulimit -c unlimited; \ + LD_LIBRARY_PATH=$basedir/$branch/lib \ + ADSP_LIBRARY_PATH=$basedir/$branch/lib \ + $verbose $experimental $sched $opmask $profile $nhvx $ndev $hb \ + ./$branch/bin/llama-completion --no-mmap -m $basedir/../gguf/$model \ + --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \ + --ctx-size 8192 --batch-size 128 -fa on \ + -ngl 99 -no-cnv --device $device $cli_opts $@ \ +" diff --git a/llama.cpp/scripts/snapdragon/adb/run-mtmd.sh b/llama.cpp/scripts/snapdragon/adb/run-mtmd.sh new file mode 100755 index 0000000..fc018e7 --- /dev/null +++ b/llama.cpp/scripts/snapdragon/adb/run-mtmd.sh @@ -0,0 +1,68 @@ +#!/bin/sh +# + +# Basedir on device +basedir=/data/local/tmp/llama.cpp + +cli_opts= + +branch=. +[ "$B" != "" ] && branch=$B + +adbserial= +[ "$S" != "" ] && adbserial="-s $S" + +adbhost= +[ "$H" != "" ] && adbhost="-H $H" + +model="gemma-3-4b-it-Q4_0.gguf" +[ "$M" != "" ] && model="$M" + +mmproj="mmproj-F16.gguf" +[ "$MMPROJ" != "" ] && mmproj="$MMPROJ" + +image= +[ "$IMG" != "" ] && image="$IMG" + +device="HTP0" +[ "$D" != "" ] && device="$D" + +verbose= +[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V" + +experimental="GGML_HEXAGON_EXPERIMENTAL=1" +[ "$E" != "" ] && experimental="GGML_HEXAGON_EXPERIMENTAL=$E" + +sched= +[ "$SCHED" != "" ] && sched="GGML_SCHED_DEBUG=2" cli_opts="$cli_opts -v" + +profile= +[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF GGML_HEXAGON_OPSYNC=1" + +opmask= +[ "$OPMASK" != "" ] && opmask="GGML_HEXAGON_OPMASK=$OPMASK" + +nhvx= +[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX" + +ndev= +[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV" + +# MTMD backend device for vision model (defaults to CPU if not set) +mtmd_backend= +[ "$MTMD_DEVICE" != "" ] && mtmd_backend="MTMD_BACKEND_DEVICE=$MTMD_DEVICE" + +set -x + +adb $adbserial $adbhost shell " \ + cd $basedir; ulimit -c unlimited; \ + LD_LIBRARY_PATH=$basedir/$branch/lib \ + ADSP_LIBRARY_PATH=$basedir/$branch/lib \ + $verbose $experimental $sched $opmask $profile $nhvx $ndev $mtmd_backend \ + ./$branch/bin/llama-mtmd-cli --no-mmap -m $basedir/../gguf/$model \ + --mmproj $basedir/../gguf/$mmproj \ + --image $basedir/../gguf/$image \ + --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \ + --ctx-size 8192 --batch-size 128 -ctk q8_0 -ctv q8_0 -fa on \ + -ngl 99 --device $device -v $cli_opts $@ \ +" diff --git a/llama.cpp/scripts/snapdragon/adb/run-tool.sh b/llama.cpp/scripts/snapdragon/adb/run-tool.sh new file mode 100755 index 0000000..4647ede --- /dev/null +++ b/llama.cpp/scripts/snapdragon/adb/run-tool.sh @@ -0,0 +1,54 @@ +#!/bin/sh +# + +# Basedir on device +basedir=/data/local/tmp/llama.cpp + +cli_opts= + +branch=. +[ "$B" != "" ] && branch=$B + +adbserial= +[ "$S" != "" ] && adbserial="-s $S" + +adbhost= +[ "$H" != "" ] && adbhost="-H $H" + +device="HTP0" +[ "$D" != "" ] && device="$D" + +verbose= +[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V" + +experimental= +[ "$E" != "" ] && experimental="GGML_HEXAGON_EXPERIMENTAL=$E" + +sched= +[ "$SCHED" != "" ] && sched="GGML_SCHED_DEBUG=2" cli_opts="$cli_opts -v" + +profile= +[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF GGML_HEXAGON_OPSYNC=1" + +opmask= +[ "$OPMASK" != "" ] && opmask="GGML_HEXAGON_OPMASK=$OPMASK" + +nhvx= +[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX" + +ndev= +[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV" + +hb= +[ "$HB" != "" ] && hb="GGML_HEXAGON_HOSTBUF=$HB" + +set -x + +tool=$1; shift + +adb $adbserial $adbhost shell " \ + cd $basedir; ulimit -c unlimited; \ + LD_LIBRARY_PATH=$basedir/$branch/lib \ + ADSP_LIBRARY_PATH=$basedir/$branch/lib \ + $verbose $experimental $sched $opmask $profile $nhvx $ndev $hb ./$branch/bin/$tool $@ \ +" diff --git a/llama.cpp/scripts/snapdragon/qdc/readme.md b/llama.cpp/scripts/snapdragon/qdc/readme.md new file mode 100644 index 0000000..b92cf24 --- /dev/null +++ b/llama.cpp/scripts/snapdragon/qdc/readme.md @@ -0,0 +1 @@ +This directory includes pytest based scripts for running CI jobs on Qualcomm Device Cloud (QDC). diff --git a/llama.cpp/scripts/snapdragon/qdc/requirements.txt b/llama.cpp/scripts/snapdragon/qdc/requirements.txt new file mode 100644 index 0000000..f04bd68 --- /dev/null +++ b/llama.cpp/scripts/snapdragon/qdc/requirements.txt @@ -0,0 +1,25 @@ +Appium-Python-Client==5.2.4 +attrs==25.4.0 +certifi==2025.10.5 +exceptiongroup==1.3.0 +h11==0.16.0 +idna==3.11 +iniconfig==2.1.0 +outcome==1.3.0.post0 +packaging==25.0 +pluggy==1.6.0 +Pygments==2.19.2 +PySocks==1.7.1 +pytest==8.4.2 +pytest-dependency==0.6.0 +selenium==4.36.0 +setuptools==80.9.0 +sniffio==1.3.1 +sortedcontainers==2.4.0 +tomli==2.3.0 +trio==0.31.0 +trio-websocket==0.12.2 +typing_extensions==4.15.0 +urllib3==2.5.0 +websocket-client==1.9.0 +wsproto==1.2.0 diff --git a/llama.cpp/scripts/snapdragon/qdc/tests/test_bench.py b/llama.cpp/scripts/snapdragon/qdc/tests/test_bench.py new file mode 100644 index 0000000..651ab5b --- /dev/null +++ b/llama.cpp/scripts/snapdragon/qdc/tests/test_bench.py @@ -0,0 +1,63 @@ +import pytest +import subprocess +import sys + +tmp_path='/data/local/tmp' +pkg_path=f'{tmp_path}/llama.cpp' +lib_path=f'{pkg_path}/lib' +bin_path=f'{pkg_path}/bin' + +model='../gguf/Llama-3.2-1B-Instruct-Q4_0.gguf' +cli_pref=f'cd {pkg_path} && LD_LIBRARY_PATH={lib_path} ADSP_LIBRARY_PATH={lib_path} {bin_path}' + + +def run_cmd(cmd): + p = subprocess.run(cmd, text = True, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + sys.stdout.write(p.stdout) + assert(p.returncode == 0) + + +@pytest.mark.dependency() +def test_install(): + run_cmd(['adb', 'push', 'llama.cpp', f'{tmp_path}']) + run_cmd(['adb', 'shell', f'chmod 755 {bin_path}/*']) + + +## Basic cli tests +def run_llama_cli(dev, opts): + prompt='what is the most popular cookie in the world?\nPlease provide a very brief bullet point summary.\nBegin your answer with **BEGIN**.' + opts = '--batch-size 128 -n 128 -no-cnv --seed 42 ' + opts + run_cmd(['adb', 'shell', f'{cli_pref}/llama-cli -m {model} --device {dev} -ngl 99 -t 4 {opts} -p "{prompt}"']) + + +@pytest.mark.dependency(depends=['test_install']) +def test_llama_cli_cpu(): + run_llama_cli('none', '-ctk q8_0 -ctv q8_0 -fa on') + + +@pytest.mark.dependency(depends=['test_install']) +def test_llama_cli_gpu(): + run_llama_cli('GPUOpenCL', '-fa on') + + +@pytest.mark.dependency(depends=['test_install']) +def test_llama_cli_npu(): + run_llama_cli('HTP0', '-ctk q8_0 -ctv q8_0 -fa on') + + +## Basic bench tests +def run_llama_bench(dev): + run_cmd(['adb', 'shell', f'{cli_pref}/llama-bench -m {model} --device {dev} -ngl 99 --batch-size 128 -t 4 -p 128 -n 32']) + + +@pytest.mark.dependency(depends=['test_install']) +def test_llama_bench_cpu(): + run_llama_bench('none') + + +def test_llama_bench_gpu(): + run_llama_bench('GPUOpenCL') + + +def test_llama_bench_npu(): + run_llama_bench('HTP0') diff --git a/llama.cpp/scripts/snapdragon/windows/run-bench.ps1 b/llama.cpp/scripts/snapdragon/windows/run-bench.ps1 new file mode 100644 index 0000000..21fd063 --- /dev/null +++ b/llama.cpp/scripts/snapdragon/windows/run-bench.ps1 @@ -0,0 +1,40 @@ + +#!/usr/bin/env pwsh + +# Basedir on device +$basedir=".\pkg-snapdragon" + +$cli_opts=$args + +$model="Llama-3.2-3B-Instruct-Q4_0.gguf" +if ($null -ne $env:M) { + $model=$env:M +} + +$device="HTP0" +if ($null -ne $env:D) { + $device=$env:D +} + +if ($null -ne $env:V) { + $env:GGML_HEXAGON_VERBOSE=$env:V +} + +if ($null -ne $env:OPMASK) { + $env:GGML_HEXAGON_OPMASK=$env:OPMASK +} + +if ($null -ne $env:NHVX) { + $env:GGML_HEXAGON_NHVX=$env:NHVX +} + +if ($null -ne $env:NDEV) { + $env:GGML_HEXAGON_NDEV=$env:NDEV +} + +$env:ADSP_LIBRARY_PATH="$basedir\lib" + +& "$basedir\bin\llama-bench.exe" ` + --mmap 0 -m $basedir\..\..\gguf\$model ` + --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 ` + --batch-size 128 -ngl 99 --device $device $cli_opts diff --git a/llama.cpp/scripts/snapdragon/windows/run-cli.ps1 b/llama.cpp/scripts/snapdragon/windows/run-cli.ps1 new file mode 100644 index 0000000..b13161a --- /dev/null +++ b/llama.cpp/scripts/snapdragon/windows/run-cli.ps1 @@ -0,0 +1,53 @@ + +#!/usr/bin/env pwsh + +# Basedir on device +$basedir=".\pkg-snapdragon" + +$cli_opts=$args + +$model="Llama-3.2-3B-Instruct-Q4_0.gguf" +if ($null -ne $env:M) { + $model=$env:M +} + +$device="HTP0" +if ($null -ne $env:D) { + $device=$env:D +} + +if ($null -ne $env:V) { + $env:GGML_HEXAGON_VERBOSE=$env:V +} + +if ($null -ne $env:E) { + $env:GGML_HEXAGON_EXPERIMENTAL=$env:E +} + +if ($null -ne $env:SCHED) { + $env:GGML_SCHED_DEBUG=$env:SCHED; $cli_opts="$cli_opts -v" +} + +if ($null -ne $env:PROF) { + $env:GGML_HEXAGON_PROFILE=$env:PROF; $env:GGML_HEXAGON_OPSYNC=1 +} + +if ($null -ne $env:OPMASK) { + $env:GGML_HEXAGON_OPMASK=$env:OPMASK +} + +if ($null -ne $env:NHVX) { + $env:GGML_HEXAGON_NHVX=$env:NHVX +} + +if ($null -ne $env:NDEV) { + $env:GGML_HEXAGON_NDEV=$env:NDEV +} + +$env:ADSP_LIBRARY_PATH="$basedir\lib" + +& "$basedir\bin\llama-completion.exe" ` + --no-mmap -no-cnv -m $basedir\..\..\gguf\$model ` + --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 ` + --ctx-size 8192 --batch-size 128 -ctk q8_0 -ctv q8_0 -fa on ` + -ngl 99 --device $device $cli_opts diff --git a/llama.cpp/scripts/snapdragon/windows/run-tool.ps1 b/llama.cpp/scripts/snapdragon/windows/run-tool.ps1 new file mode 100644 index 0000000..70094af --- /dev/null +++ b/llama.cpp/scripts/snapdragon/windows/run-tool.ps1 @@ -0,0 +1,56 @@ + +#!/usr/bin/env pwsh + +# Basedir on device +$basedir=".\pkg-snapdragon" + +if ($args.Count -eq 0) { + Write-Host "No arguments provided.Expected the tool and argument to run." + exit -1 +} + +$tool=$args[0] +$cli_opts=@() + +if ($args.Count -gt 1) { + $cli_opts=$args[1..($args.Count - 1)] + $remainingArgs = $args[1..($args.Count - 1)] +} + +$device="HTP0" +if ($null -ne $env:D) { + $device=$env:D +} + +if ($null -ne $env:V) { + $env:GGML_HEXAGON_VERBOSE=$env:V +} + +if ($null -ne $env:E) { + $env:GGML_HEXAGON_EXPERIMENTAL=$env:E +} + +if ($null -ne $env:SCHED) { + $env:GGML_SCHED_DEBUG=$env:SCHED; $cli_opts="$cli_opts -v" +} + +if ($null -ne $env:PROF) { + $env:GGML_HEXAGON_PROFILE=$env:PROF; $env:GGML_HEXAGON_OPSYNC=1 +} + +if ($null -ne $env:OPMASK) { + $env:GGML_HEXAGON_OPMASK=$env:OPMASK +} + +if ($null -ne $env:NHVX) { + $env:GGML_HEXAGON_NHVX=$env:NHVX +} + +if ($null -ne $env:NDEV) { + $env:GGML_HEXAGON_NDEV=$env:NDEV +} + +$env:ADSP_LIBRARY_PATH="$basedir\lib" + +& "$basedir\bin\$tool" ` + $cli_opts diff --git a/llama.cpp/scripts/snapdragon/windows/setup-build.ps1 b/llama.cpp/scripts/snapdragon/windows/setup-build.ps1 new file mode 100644 index 0000000..0f3244c --- /dev/null +++ b/llama.cpp/scripts/snapdragon/windows/setup-build.ps1 @@ -0,0 +1,105 @@ +# Requires Run as Administrator is NOT strictly necessary for User-scope env vars, +# but recommended for creating directories in C:\ root if permissions are restricted. + +$ErrorActionPreference = "Stop" + +# --- Configuration --- +$BaseDir = "C:\Qualcomm" + +# SDK 1: Hexagon +$HexagonUrl = "https://github.com/snapdragon-toolchain/hexagon-sdk/releases/download/v6.4.0.2/hexagon-sdk-v6.4.0.2-arm64-wos.tar.xz" +$HexagonParent = Join-Path $BaseDir "Hexagon_SDK" +$HexagonSdkVersion = "6.4.0.2" +$HexagonToolsVersion = "19.0.04" +$HexagonSdkTarget = Join-Path $HexagonParent $HexagonSdkVersion +$HexagonToolsTarget = Join-Path $HexagonSdkTarget "\tools\HEXAGON_Tools\$HexagonToolsVersion" + +# SDK 2: OpenCL +$OpenCLUrl = "https://github.com/snapdragon-toolchain/opencl-sdk/releases/download/v2.3.2/adreno-opencl-sdk-v2.3.2-arm64-wos.tar.xz" +$OpenCLParent = Join-Path $BaseDir "OpenCL_SDK" +$OpenCLVersion = "2.3.2" +$OpenCLTarget = Join-Path $OpenCLParent $OpenCLVersion + +# --- Helper Function --- +function Install-QualcommSDK { + param ( + [string]$Url, + [string]$ParentDir, + [string]$TargetDir, + [string]$Name + ) + + # 1. Create Parent Directory + if (-not (Test-Path -Path $ParentDir)) { + Write-Host "Creating directory: $ParentDir" -ForegroundColor Cyan + New-Item -Path $ParentDir -ItemType Directory -Force | Out-Null + } + + # 2. Check for Specific Version Directory + if (Test-Path -Path $TargetDir) { + Write-Host "$Name ($TargetDir) already exists. Skipping download." -ForegroundColor Green + } + else { + Write-Host "$Name not found. preparing to download..." -ForegroundColor Yellow + + # Create the target directory to extract into + New-Item -Path $TargetDir -ItemType Directory -Force | Out-Null + + # Define temporary archive path + $TempFile = Join-Path $ParentDir "temp_sdk.tar.xz" + + try { + # Download + Write-Host "Downloading from: $Url" + Invoke-WebRequest -Uri $Url -OutFile $TempFile + + # Untar + # Note: We assume Windows includes tar.exe (Win 10 build 17063+) + Write-Host "Extracting archive to $TargetDir..." + + # We use -C to extract contents INTO the target directory created above + tar -xJvf $TempFile -C $TargetDir\.. + + Write-Host "Extraction complete." -ForegroundColor Green + } + catch { + Write-Error "Failed to download or extract $Name. Error: $_" + # Cleanup target dir if failed so script tries again next time + Remove-Item -Path $TargetDir -Recurse -Force -ErrorAction SilentlyContinue + } + finally { + # Cleanup Archive + if (Test-Path $TempFile) { Remove-Item $TempFile -Force } + } + } +} + +# --- Execution --- + +# 1. Ensure Base C:\Qualcomm exists +if (-not (Test-Path $BaseDir)) { + New-Item -Path $BaseDir -ItemType Directory -Force | Out-Null +} + +# 2. Run Install Logic +Install-QualcommSDK -Url $HexagonUrl -ParentDir $HexagonParent -TargetDir $HexagonSdkTarget -Name "Hexagon SDK" +Install-QualcommSDK -Url $OpenCLUrl -ParentDir $OpenCLParent -TargetDir $OpenCLTarget -Name "OpenCL SDK" + +# --- Environment Variables --- + +Write-Host "`nSetting Environment Variables..." -ForegroundColor Cyan + +# Set OPENCL_SDK_ROOT +[System.Environment]::SetEnvironmentVariable('OPENCL_SDK_ROOT', $OpenCLTarget, [System.EnvironmentVariableTarget]::User) +$env:OPENCL_SDK_ROOT = $OpenCLTarget # Set for current session as well +Write-Host "OPENCL_SDK_ROOT set to: $OpenCLTarget" + +# Set HEXAGON_SDK_ROOT +[System.Environment]::SetEnvironmentVariable('HEXAGON_SDK_ROOT', $HexagonSdkTarget, [System.EnvironmentVariableTarget]::User) +$env:HEXAGON_SDK_ROOT = $HexagonSdkTarget # Set for current session as well +Write-Host "HEXAGON_SDK_ROOT set to: $HexagonSdkTarget" + +# Set HEXAGON_SDK_ROOT +[System.Environment]::SetEnvironmentVariable('HEXAGON_TOOLS_ROOT', $HexagonToolsTarget, [System.EnvironmentVariableTarget]::User) +$env:HEXAGON_TOOLS_ROOT = $HexagonToolsTarget # Set for current session as well +Write-Host "HEXAGON_TOOLS_ROOT set to: $HexagonToolsTarget" diff --git a/llama.cpp/scripts/sync-ggml-am.sh b/llama.cpp/scripts/sync-ggml-am.sh new file mode 100755 index 0000000..826c560 --- /dev/null +++ b/llama.cpp/scripts/sync-ggml-am.sh @@ -0,0 +1,158 @@ +#!/usr/bin/env bash +# +# Synchronize ggml changes to llama.cpp +# +# Usage: +# +# $ cd /path/to/llama.cpp +# $ ./scripts/sync-ggml-am.sh -skip hash0,hash1,hash2... -C 3 +# + +set -e + +sd=$(dirname $0) +cd $sd/../ + +SRC_LLAMA=$(pwd) +SRC_GGML=$(cd ../ggml; pwd) + +if [ ! -d $SRC_GGML ]; then + echo "ggml not found at $SRC_GGML" + exit 1 +fi + +lc=$(cat $SRC_LLAMA/scripts/sync-ggml.last) +echo "Syncing ggml changes since commit $lc" + +to_skip="" + +# context for git patches in number of lines +ctx="8" + +while [ "$1" != "" ]; do + case $1 in + -skip ) + shift + to_skip=$1 + ;; + -C ) + shift + ctx=$1 + ;; + esac + shift +done + +cd $SRC_GGML + +git log --oneline $lc..HEAD +git log --oneline $lc..HEAD --reverse | grep -v "(llama/[0-9]*)" | cut -d' ' -f1 > $SRC_LLAMA/ggml-commits + +if [ ! -s $SRC_LLAMA/ggml-commits ]; then + rm -v $SRC_LLAMA/ggml-commits + echo "No new commits" + exit 0 +fi + +if [ -f $SRC_LLAMA/ggml-src.patch ]; then + rm -v $SRC_LLAMA/ggml-src.patch +fi + +while read c; do + if [ -n "$to_skip" ]; then + if [[ $to_skip == *"$c"* ]]; then + echo "Skipping $c" + continue + fi + fi + + git format-patch -U${ctx} -k $c~1..$c --stdout -- \ + CMakeLists.txt \ + src/CMakeLists.txt \ + cmake/BuildTypes.cmake \ + cmake/GitVars.cmake \ + cmake/common.cmake \ + cmake/ggml-config.cmake.in \ + src/ggml-cpu/cmake/FindSIMD.cmake \ + src/ggml* \ + include/ggml*.h \ + include/gguf*.h \ + tests/test-opt.cpp \ + tests/test-quantize-fns.cpp \ + tests/test-quantize-perf.cpp \ + tests/test-backend-ops.cpp \ + LICENSE \ + scripts/gen-authors.sh \ + >> $SRC_LLAMA/ggml-src.patch +done < $SRC_LLAMA/ggml-commits + +rm -v $SRC_LLAMA/ggml-commits + +# delete files if empty +if [ ! -s $SRC_LLAMA/ggml-src.patch ]; then + rm -v $SRC_LLAMA/ggml-src.patch +fi + +cd $SRC_LLAMA + +if [ -f $SRC_LLAMA/ggml-src.patch ]; then + # replace PR numbers + # + # Subject: some text (#1234) + # Subject: some text (ggml/1234) + cat ggml-src.patch | sed -e 's/^Subject: \(.*\) (#\([0-9]*\))/Subject: \1 (ggml\/\2)/' > ggml-src.patch.tmp + mv ggml-src.patch.tmp ggml-src.patch + + cat ggml-src.patch | sed -e 's/^\(.*\) (#\([0-9]*\))$/\1 (ggml\/\2)/' > ggml-src.patch.tmp + mv ggml-src.patch.tmp ggml-src.patch + + # replace filenames: + # + # CMakelists.txt -> ggml/CMakeLists.txt + # src/CMakeLists.txt -> ggml/src/CMakeLists.txt + + # cmake/BuildTypes.cmake -> ggml/cmake/BuildTypes.cmake + # cmake/GitVars.cmake -> ggml/cmake/GitVars.cmake + # cmake/common.cmake -> ggml/cmake/common.cmake + # cmake/ggml-config.cmake.in -> ggml/cmake/ggml-config.cmake.in + # src/ggml-cpu/cmake/FindSIMD.cmake -> ggml/src/ggml-cpu/cmake/FindSIMD.cmake + # + # src/ggml* -> ggml/src/ggml* + # + # include/ggml*.h -> ggml/include/ggml*.h + # include/gguf*.h -> ggml/include/gguf*.h + # + # tests/test*.cpp -> tests/ + # + # LICENSE -> LICENSE + # scripts/gen-authors.sh -> scripts/gen-authors.sh + + cat ggml-src.patch | sed -E \ + -e 's/([[:space:]]| [ab]\/)CMakeLists.txt/\1ggml\/CMakeLists.txt/g' \ + -e 's/([[:space:]]| [ab]\/)src\/CMakeLists.txt/\1ggml\/src\/CMakeLists.txt/g' \ + -e 's/([[:space:]]| [ab]\/)cmake\/BuildTypes.cmake/\1ggml\/cmake\/BuildTypes.cmake/g' \ + -e 's/([[:space:]]| [ab]\/)cmake\/GitVars.cmake/\1ggml\/cmake\/GitVars.cmake/g' \ + -e 's/([[:space:]]| [ab]\/)cmake\/common.cmake/\1ggml\/cmake\/common.cmake/g' \ + -e 's/([[:space:]]| [ab]\/)cmake\/ggml-config.cmake.in/\1ggml\/cmake\/ggml-config.cmake.in/g' \ + -e 's/([[:space:]]| [ab]\/)src\/ggml-cpu\/cmake\/FindSIMD.cmake/\1ggml\/src\/ggml-cpu\/cmake\/FindSIMD.cmake/g' \ + -e 's/([[:space:]]| [ab]\/)src\/ggml(.*)/\1ggml\/src\/ggml\2/g' \ + -e 's/([[:space:]]| [ab]\/)include\/ggml(.*)\.h/\1ggml\/include\/ggml\2.h/g' \ + -e 's/([[:space:]]| [ab]\/)include\/gguf(.*)\.h/\1ggml\/include\/gguf\2.h/g' \ + -e 's/([[:space:]]| [ab]\/)tests\/(.*)\.cpp/\1tests\/\2.cpp/g' \ + -e 's/([[:space:]]| [ab]\/)LICENSE/\1LICENSE/g' \ + -e 's/([[:space:]]| [ab]\/)scripts\/gen-authors\.sh/\1scripts\/gen-authors.sh/g' \ + > ggml-src.patch.tmp + mv ggml-src.patch.tmp ggml-src.patch + + git am -C${ctx} ggml-src.patch + + rm -v $SRC_LLAMA/ggml-src.patch +fi + +# update last commit +cd $SRC_GGML +git log -1 --format=%H > $SRC_LLAMA/scripts/sync-ggml.last + +echo "Done" + +exit 0 diff --git a/llama.cpp/scripts/sync-ggml.last b/llama.cpp/scripts/sync-ggml.last new file mode 100644 index 0000000..81e79a9 --- /dev/null +++ b/llama.cpp/scripts/sync-ggml.last @@ -0,0 +1 @@ +a8db410a252c8c8f2d120c6f2e7133ebe032f35d diff --git a/llama.cpp/scripts/sync-ggml.sh b/llama.cpp/scripts/sync-ggml.sh new file mode 100755 index 0000000..2da9b57 --- /dev/null +++ b/llama.cpp/scripts/sync-ggml.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +cp -rpv ../ggml/CMakeLists.txt ./ggml/CMakeLists.txt +cp -rpv ../ggml/src/CMakeLists.txt ./ggml/src/CMakeLists.txt + +cp -rpv ../ggml/cmake/* ./ggml/cmake/ +cp -rpv ../ggml/src/ggml-cpu/cmake/* ./ggml/src/ggml-cpu/cmake/ + +cp -rpv ../ggml/src/ggml* ./ggml/src/ + +cp -rpv ../ggml/include/ggml*.h ./ggml/include/ +cp -rpv ../ggml/include/gguf*.h ./ggml/include/ + +cp -rpv ../ggml/tests/test-opt.cpp ./tests/test-opt.cpp +cp -rpv ../ggml/tests/test-quantize-fns.cpp ./tests/test-quantize-fns.cpp +cp -rpv ../ggml/tests/test-quantize-perf.cpp ./tests/test-quantize-perf.cpp +cp -rpv ../ggml/tests/test-backend-ops.cpp ./tests/test-backend-ops.cpp + +cp -rpv ../LICENSE ./LICENSE +cp -rpv ../ggml/scripts/gen-authors.sh ./scripts/gen-authors.sh diff --git a/llama.cpp/scripts/sync_vendor.py b/llama.cpp/scripts/sync_vendor.py new file mode 100755 index 0000000..1ff6a9a --- /dev/null +++ b/llama.cpp/scripts/sync_vendor.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 + +import urllib.request + +vendor = { + "https://github.com/nlohmann/json/releases/latest/download/json.hpp": "vendor/nlohmann/json.hpp", + "https://github.com/nlohmann/json/releases/latest/download/json_fwd.hpp": "vendor/nlohmann/json_fwd.hpp", + + "https://raw.githubusercontent.com/nothings/stb/refs/heads/master/stb_image.h": "vendor/stb/stb_image.h", + + # not using latest tag to avoid this issue: https://github.com/ggml-org/llama.cpp/pull/17179#discussion_r2515877926 + # "https://github.com/mackron/miniaudio/raw/refs/tags/0.11.23/miniaudio.h": "vendor/miniaudio/miniaudio.h", + "https://github.com/mackron/miniaudio/raw/669ed3e844524fcd883231b13095baee9f6de304/miniaudio.h": "vendor/miniaudio/miniaudio.h", + + "https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.30.2/httplib.h": "vendor/cpp-httplib/httplib.h", + "https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.30.2/LICENSE": "vendor/cpp-httplib/LICENSE", + + "https://raw.githubusercontent.com/sheredom/subprocess.h/b49c56e9fe214488493021017bf3954b91c7c1f5/subprocess.h": "vendor/sheredom/subprocess.h", +} + +for url, filename in vendor.items(): + print(f"downloading {url} to {filename}") # noqa: NP100 + urllib.request.urlretrieve(url, filename) + + # split cpp/h files for httplib + # see: https://github.com/yhirose/cpp-httplib/blob/master/split.py + if 'httplib.h' in filename: + border = '// ----------------------------------------------------------------------------' + with open(filename, 'r') as f: + content = f.read() + header, implementation, footer = content.split(border, 2) + fname_cpp = filename.replace('.h', '.cpp') + with open(filename, 'w') as fh: + fh.write(header) + fh.write(footer) + with open(fname_cpp, 'w') as fc: + fc.write('#include "httplib.h"\n') + fc.write('namespace httplib {\n') + fc.write(implementation.replace('\ninline ', '\n')) + fc.write('} // namespace httplib\n') diff --git a/llama.cpp/scripts/tool_bench.py b/llama.cpp/scripts/tool_bench.py new file mode 100755 index 0000000..d9f5583 --- /dev/null +++ b/llama.cpp/scripts/tool_bench.py @@ -0,0 +1,379 @@ +#!/usr/bin/env uv run +''' + Simplistic tool call benchmarks for llama-server and ollama. + + Essentially runs the tests at server/tools/server/tests/unit/test_tool_call.py N times, at different temperatures and on different backends (current llama-server, baseline llama-server and ollama), + and plots the results of multiple runs (from same .jsonl file or multiple ones) as a success rate heatmap. + + Simple usage example: + + cmake -B build && cmake --build build --config Release -j -t llama-server + + export LLAMA_SERVER_BIN_PATH=$PWD/build/bin/llama-server + export LLAMA_CACHE=${LLAMA_CACHE:-$HOME/Library/Caches/llama.cpp} + + ./scripts/tool_bench.py run --n 10 --temp -1 --temp 0 --temp 1 --temp 2 --temp 5 --llama-baseline $PWD/buildMaster/bin/llama-server --output qwen14b.jsonl --hf bartowski/Qwen2.5-14B-Instruct-GGUF:Q4_K_L + ./scripts/tool_bench.py run --n 30 --temp -1 --temp 0 --temp 1 --model "Qwen 2.5 1.5B Q4_K_M" --output qwen1.5b.jsonl --hf bartowski/Qwen2.5-1.5B-Instruct-GGUF --ollama qwen2.5:1.5b-instruct-q4_K_M + ./scripts/tool_bench.py run --n 30 --temp -1 --temp 0 --temp 1 --model "Qwen 2.5 Coder 7B Q4_K_M" --output qwenc7b.jsonl --hf bartowski/Qwen2.5-Coder-7B-Instruct-GGUF --ollama qwen2.5-coder:7b + + ./scripts/tool_bench.py plot *.jsonl # Opens window w/ heatmap + ./scripts/tool_bench.py plot qwen*.jsonl --output qwen.png # Saves heatmap to qwen.png + + (please see ./scripts/tool_bench.sh for a more complete example) +''' +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "pytest", +# "pandas", +# "matplotlib", +# "seaborn", +# "requests", +# "wget", +# "typer", +# ] +# /// +from contextlib import contextmanager +from pathlib import Path +import re +from statistics import mean, median +from typing import Annotated, Dict, List, Optional, Tuple +import atexit +import json +import logging +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import seaborn as sns +import subprocess +import sys +import time +import typer + +sys.path.insert(0, Path(__file__).parent.parent.as_posix()) +if True: + from tools.server.tests.utils import ServerProcess + from tools.server.tests.unit.test_tool_call import do_test_calc_result, do_test_hello_world, do_test_weather + + +@contextmanager +def scoped_server(sp: ServerProcess): + def stop(): + nonlocal sp + if sp is not None: + sp.stop() + sp = None # type: ignore + atexit.register(stop) + yield sp + stop() + + +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + +app = typer.Typer() + + +@app.command() +def plot(files: List[Path], output: Optional[Path] = None, test_regex: Optional[str] = None, server_regex: Optional[str] = None): + + lines: List[Dict] = [] + for file in files: + if not file.exists(): + logger.error(f"File not found: {file}") + continue + + try: + with file.open() as f: + raw_data = f.read() + logger.info(f"Reading {file} ({len(raw_data)} bytes)") + + for line_num, line in enumerate(raw_data.split('\n'), 1): + line = line.strip() + if not line: + continue + try: + record = json.loads(line) + lines.append(record) + except json.JSONDecodeError as e: + logger.warning(f"Invalid JSON at {file}:{line_num} - {e}") + except Exception as e: + logger.error(f"Error processing {file}: {e}") + + if not lines: + raise Exception("No valid data was loaded") + + data_dict: Dict[Tuple, float] = {} + models: List[str] = [] + temps = set() + tests = set() + server_names = set() + total_counts = set() + for rec in lines: + try: + model = rec["model"] + temp = rec["temp"] + server_name = rec["server_name"] + test = rec["test"] + success = rec["success_ratio"] + success_count = rec["success_count"] + failure_count = rec["failure_count"] + total_count = success_count + failure_count + total_counts.add(total_count) + + if test_regex and not re.search(test_regex, test): + continue + + if server_regex and not re.search(server_regex, server_name): + continue + + data_dict[(model, temp, server_name, test)] = success + + if model not in models: + models.append(model) + temps.add(temp) + tests.add(test) + server_names.add(server_name) + + except KeyError as e: + logger.warning(f"Missing required field in record: {e}") + + if len(total_counts) > 1: + logger.warning(f"Total counts are not consistent: {total_counts}") + + # Sort the collected values + temps = list(sorted(temps, key=lambda x: x if x is not None else -1)) + tests = list(sorted(tests)) + server_names = list(sorted(server_names)) + + logger.info(f"Processed {len(lines)} lines") + logger.info(f"Found {len(data_dict)} valid data points") + logger.info(f"Models: {models}") + logger.info(f"Temperatures: {temps}") + logger.info(f"Tests: {tests}") + logger.info(f"Servers: {server_names}") + + matrix: list[list[float]] = [] + index: list[str] = [] + + all_cols = [ + (server_name, test) + for server_name in server_names + for test in tests + ] + for model in models: + for temp in temps: + index.append(f"{model} @ {temp}") + row_vals = [ + data_dict.get((model, temp, server_name, test), np.nan) + for server_name, test in all_cols + ] + matrix.append(row_vals) + + columns: list[str] = [f"{server_name}\n{test}" for server_name, test in all_cols] + + df = pd.DataFrame(matrix, index=np.array(index), columns=np.array(columns)) + + plt.figure(figsize=(12, 6)) + + sns.heatmap( + df, annot=True, cmap="RdYlGn", vmin=0.0, vmax=1.0, cbar=True, fmt=".2f", center=0.5, square=True, linewidths=0.5, + cbar_kws={"label": "Success Ratio"}, + ) + + plt.title(f"Tool Call Bench (n = {str(min(total_counts)) if len(total_counts) == 1 else f'{min(total_counts)}-{max(total_counts)}'})\nSuccess Ratios by Server & Test", pad=20) + plt.xlabel("Server & Test", labelpad=10) + plt.ylabel("Model @ Temperature", labelpad=10) + + plt.xticks(rotation=45, ha='right') + plt.yticks(rotation=0) + + plt.tight_layout() + + if output: + plt.savefig(output, dpi=300, bbox_inches='tight') + logger.info(f"Plot saved to {output}") + else: + plt.show() + + +@app.command() +def run( + output: Annotated[Path, typer.Option(help="Output JSON file")], + model: Annotated[Optional[str], typer.Option(help="Name of the model to test (server agnostic)")] = None, + hf: Annotated[Optional[str], typer.Option(help="GGUF huggingface model repo id (+ optional quant) to test w/ llama-server")] = None, + chat_template: Annotated[Optional[str], typer.Option(help="Chat template override for llama-server")] = None, + chat_template_file: Annotated[Optional[str], typer.Option(help="Chat template file override for llama-server")] = None, + ollama: Annotated[Optional[str], typer.Option(help="Ollama model tag to test")] = None, + llama_baseline: Annotated[Optional[str], typer.Option(help="llama-server baseline binary path to use as baseline")] = None, + n: Annotated[int, typer.Option(help="Number of times to run each test")] = 10, + temp: Annotated[Optional[List[float]], typer.Option(help="Set of temperatures to test")] = None, + top_p: Annotated[Optional[float], typer.Option(help="top_p")] = None, + top_k: Annotated[Optional[int], typer.Option(help="top_k")] = None, + ctk: Annotated[Optional[str], typer.Option(help="ctk")] = None, + ctv: Annotated[Optional[str], typer.Option(help="ctv")] = None, + fa: Annotated[Optional[bool], typer.Option(help="fa")] = None, + seed: Annotated[Optional[int], typer.Option(help="Random seed")] = None, + port: Annotated[int, typer.Option(help="llama-server port")] = 8084, + force: Annotated[bool, typer.Option(help="Force overwrite of output file")] = False, + append: Annotated[bool, typer.Option(help="Append to output file")] = False, + + test_hello_world: Annotated[bool, typer.Option(help="Whether to run the hello world test")] = True, + test_weather: Annotated[bool, typer.Option(help="Whether to run the weather test")] = True, + test_calc_result: Annotated[bool, typer.Option(help="Whether to run the calc result test")] = False, +): + # Check only one of output and append + + n_predict = 512 # High because of DeepSeek R1 + # n_ctx = 8192 + n_ctx = 2048 + + if model is None: + if hf is not None: + model = hf.split("/")[-1] + elif ollama is not None: + model = ollama + + assert force or append or not output.exists(), f"Output file already exists: {output}; use --force to overwrite" + + with output.open('a' if append else 'w') as output_file: + + def run(server: ServerProcess, *, server_name: str, model_id: str, temp: Optional[float] = None, output_kwargs={}, request_kwargs={}): + request_kwargs = {**request_kwargs} + if temp is not None: + request_kwargs['temperature'] = temp + if top_p is not None: + request_kwargs['top_p'] = top_p + if top_k is not None: + request_kwargs['top_k'] = top_k + if seed is not None: + request_kwargs['seed'] = seed + + request_kwargs['cache_prompt'] = False + + tests = {} + if test_hello_world: + tests["hello world"] = lambda server: do_test_hello_world(server, **request_kwargs) + if test_weather: + tests["weather"] = lambda server: do_test_weather(server, **request_kwargs) + if test_calc_result: + tests["calc result"] = lambda server: do_test_calc_result(server, None, 512, **request_kwargs) + + for test_name, test in tests.items(): + success_count = 0 + failure_count = 0 + failures = [] + success_times = [] + failure_times = [] + logger.info(f"Running {test_name} ({server_name}, {model}): ") + for i in range(n): + start_time = time.time() + + def elapsed(): + return time.time() - start_time + + try: + test(server) + success_times.append(elapsed()) + success_count += 1 + logger.info('success') + except Exception as e: + logger.error(f'failure: {e}') + failure_count += 1 + failure_times.append(elapsed()) + failures.append(str(e)) + # import traceback + # traceback.print_exc() + output_file.write(json.dumps({**output_kwargs, **dict( + model=model, + server_name=server_name, + model_id=model_id, + test=test_name, + temp=t, + top_p=top_p, + top_k=top_k, + ctk=ctk, + ctv=ctv, + seed=seed, + success_ratio=float(success_count) / n, + avg_time=mean(success_times + failure_times), + median_time=median(success_times + failure_times), + success_count=success_count, + success_times=success_times, + failure_count=failure_count, + failure_times=failure_times, + failures=list(set(failures)), + )}) + '\n') + output_file.flush() + + for t in [None] if temp is None else [t if t >= 0 else None for t in temp]: + if hf is not None: + + servers: list[Tuple[str, Optional[str]]] = [('llama-server', None)] + if llama_baseline is not None: + servers.append(('llama-server (baseline)', llama_baseline)) + + for server_name, server_path in servers: + server = ServerProcess() + server.n_ctx = n_ctx + server.n_slots = 1 + server.jinja = True + server.ctk = ctk + server.ctv = ctv + server.fa = "on" if fa else "off" + server.n_predict = n_predict + server.model_hf_repo = hf + server.model_hf_file = None + server.chat_template = chat_template + server.chat_template_file = chat_template_file + server.server_path = server_path + if port is not None: + server.server_port = port + # server.debug = True + + with scoped_server(server): + server.start(timeout_seconds=15 * 60) + for ignore_chat_grammar in [False]: + run( + server, + server_name=server_name, + model_id=hf, + temp=t, + output_kwargs=dict( + chat_template=chat_template, + chat_template_file=chat_template_file, + ), + request_kwargs=dict( + ignore_chat_grammar=ignore_chat_grammar, + ), + ) + + if ollama is not None: + server = ServerProcess() + server.server_port = 11434 + server.server_host = "localhost" + subprocess.check_call(["ollama", "pull", ollama]) + + with scoped_server(server): + run( + server, + server_name="ollama", + model_id=ollama, + temp=t, + output_kwargs=dict( + chat_template=None, + chat_template_file=None, + ), + request_kwargs=dict( + model=ollama, + max_tokens=n_predict, + num_ctx = n_ctx, + ), + ) + + +if __name__ == "__main__": + app() diff --git a/llama.cpp/scripts/tool_bench.sh b/llama.cpp/scripts/tool_bench.sh new file mode 100755 index 0000000..05b41d2 --- /dev/null +++ b/llama.cpp/scripts/tool_bench.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +set -euo pipefail + +cmake --build build -j + +export LLAMA_CACHE=${LLAMA_CACHE:-$HOME/Library/Caches/llama.cpp} +export LLAMA_SERVER_BIN_PATH=$PWD/build/bin/llama-server + +if [ ! -x "$LLAMA_SERVER_BIN_PATH" ]; then + echo "Could not find llama-server binary at $LLAMA_SERVER_BIN_PATH" + exit 1 +fi +if [ ! -d "$LLAMA_CACHE" ]; then + echo "Could not find llama cache at $LLAMA_CACHE, please set LLAMA_CACHE explicitly." + exit 1 +fi + +export ARGS=( + --llama-baseline="$(which llama-server)" + --n 30 + --temp -1 # Leaves temperature parameter unset (use the server's default, e.g. 0.6 for ollama) + --temp 0 + --temp 0.5 + --temp 0.75 + --temp 1 + --temp 1.5 + --temp 2 + --temp 5 + "$@" +) + +./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 Coder 0.5B Q4_K_M" --output ../qwenc0.5b.jsonl --hf bartowski/Qwen2.5-Coder-0.5B-Instruct-GGUF:Q4_K_M --ollama qwen2.5-coder:0.5b-instruct-q4_K_M +./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 Coder 1.5B Q4_K_M" --output ../qwenc1.5b.jsonl --hf bartowski/Qwen2.5-Coder-1.5B-Instruct-GGUF:Q4_K_M --ollama qwen2.5-coder:1.5b-instruct-q4_K_M +./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 Coder 3B Q4_K_M" --output ../qwenc3b.jsonl --hf bartowski/Qwen2.5-Coder-3B-Instruct-GGUF:Q4_K_M --ollama qwen2.5-coder:3b-instruct-q4_K_M +./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 Coder 7B Q4_K_M" --output ../qwenc7b.jsonl --hf bartowski/Qwen2.5-Coder-7B-Instruct-GGUF:Q4_K_M --ollama qwen2.5-coder:7b-instruct-q4_K_M +./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 Coder 32B Q4_K_M" --output ../qwenc32b.jsonl --hf bartowski/Qwen2.5-Coder-32B-Instruct-GGUF:Q4_K_M --ollama qwen2.5-coder:32B-instruct-q4_K_M +./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 1.5B Q4_K_M" --output ../qwen1.5b.jsonl --hf bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M --ollama qwen2.5:1.5b-instruct-q4_K_M +./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 3B Q4_K_M" --output ../qwen3b.jsonl --hf bartowski/Qwen2.5-3B-Instruct-GGUF:Q4_K_M --ollama qwen2.5:3b-instruct-q4_K_M +./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 7B Q4_K_M" --output ../qwen7b.jsonl --hf bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M --ollama qwen2.5:7b-instruct-q4_K_M + +./scripts/tool_bench.py run ${ARGS[@]} --model "Llama 3.2 Instruct 1B Q4_K_M" --output ../llama1b.jsonl --hf bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M --ollama llama3.2:1b-instruct-q4_K_M +./scripts/tool_bench.py run ${ARGS[@]} --model "Llama 3.2 Instruct 3B Q4_K_M" --output ../llama3b.jsonl --hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M --ollama llama3.2:3b-instruct-q4_K_M +./scripts/tool_bench.py run ${ARGS[@]} --model "Llama 3.1 Instruct 8B Q4_K_M" --output ../llama8b.jsonl --hf bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M --ollama llama3.1:8b-instruct-q4_K_M +./scripts/tool_bench.py run ${ARGS[@]} --model "Llama 3.3 70B Q4_K_M" --output ../llama70b.jsonl --hf bartowski/Llama-3.3-70B-Instruct-GGUF:Q4_K_M + +./scripts/tool_bench.py run ${ARGS[@]} --model "Mistral Nemo Q4_K_M" --output ../nemo.jsonl --hf bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M --ollama mistral-nemo:12b-instruct-2407-q4_K_M + +./scripts/tool_bench.py run ${ARGS[@]} --model "Hermes 3 Llama 3.1 8B Q4_K_M" --output ../hermes3.jsonl --hf bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M --ollama hermes3:8b-llama3.1-q4_K_M --chat-template-file <( python scripts/get_chat_template.py NousResearch/Hermes-3-Llama-3.1-8B tool_use ) +./scripts/tool_bench.py run ${ARGS[@]} --model "Hermes 2 Pro Llama 3 8B Q4_K_M" --output ../hermes2.jsonl --hf bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M --ollama hermes2:8b-llama3-q4_K_M --chat-template-file <( python scripts/get_chat_template.py NousResearch/Hermes-2-Pro-Llama-3-8B tool_use ) + +./scripts/tool_bench.py run ${ARGS[@]} --model "Functionary Small V3.2 Q4_K_M" --output ../funct3.2.jsonl --hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M +./scripts/tool_bench.py run ${ARGS[@]} --model "FireFunction V2 IQ1_M" --output ../firef2.jsonl --hf bartowski/firefunction-v2-GGUF:IQ1_M --chat-template-file <( python scripts/get_chat_template.py fireworks-ai/llama-3-firefunction-v2 tool_use ) + +./scripts/tool_bench.py run ${ARGS[@]} --model "Command R7B 12-2024 Q6_K_L" --output ../c4ai.jsonl --hf bartowski/c4ai-command-r7b-12-2024-GGUF:Q6_K_L --chat-template-file <( python scripts/get_chat_template.py CohereForAI/c4ai-command-r7b-12-2024 tool_use ) + +./scripts/tool_bench.py run ${ARGS[@]} --model "Gemma 2 2B Q8_0" --output ../gemma2.jsonl --hf bartowski/gemma-2-2b-it-GGUF:Q8_0 +./scripts/tool_bench.py run ${ARGS[@]} --model "Phi 4 Instruct Q4_K_M" --output ../phi4.jsonl --hf bartowski/phi-4-GGUF:Q4_K_M # --ollama phi4 +./scripts/tool_bench.py run ${ARGS[@]} --model "Phi 3.5 Mini Instruct Q4_K_M" --output ../phi3.5.jsonl --hf bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M # --ollama phi3.5:3.8b-mini-instruct-q4_K_M + +# ./scripts/tool_bench.py run ${ARGS[@]} --model "DeepSeek R1 Distill Qwen 7B Q6_K_L" --output ../dsqw7.jsonl --hf bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q6_K_L --chat-template-file <( python scripts/get_chat_template.py NousResearch/DeepSeek-R1-Distill-Qwen-7B tool_use ) +# ./scripts/tool_bench.py run ${ARGS[@]} --model "DeepSeek R1 Distill Qwen 32B Q4_K_M" --output ../dsqw32.jsonl --hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M --chat-template-file <( python scripts/get_chat_template.py NousResearch/DeepSeek-R1-Distill-Qwen-32B tool_use ) + + +for f in ../*.jsonl; do + ./scripts/tool_bench.py plot "$f" --output ${f%.jsonl}.png || true +done diff --git a/llama.cpp/scripts/verify-checksum-models.py b/llama.cpp/scripts/verify-checksum-models.py new file mode 100755 index 0000000..0b5b9aa --- /dev/null +++ b/llama.cpp/scripts/verify-checksum-models.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 + +import logging +import os +import hashlib + +logger = logging.getLogger("verify-checksum-models") + + +def sha256sum(file): + block_size = 16 * 1024 * 1024 # 16 MB block size + b = bytearray(block_size) + file_hash = hashlib.sha256() + mv = memoryview(b) + with open(file, 'rb', buffering=0) as f: + while True: + n = f.readinto(mv) + if not n: + break + file_hash.update(mv[:n]) + + return file_hash.hexdigest() + + +# Define the path to the llama directory (parent folder of script directory) +llama_path = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir)) + +# Define the file with the list of hashes and filenames +hash_list_file = os.path.join(llama_path, "SHA256SUMS") + +# Check if the hash list file exists +if not os.path.exists(hash_list_file): + logger.error(f"Hash list file not found: {hash_list_file}") + exit(1) + +# Read the hash file content and split it into an array of lines +with open(hash_list_file, "r") as f: + hash_list = f.read().splitlines() + +# Create an array to store the results +results = [] + +# Loop over each line in the hash list +for line in hash_list: + # Split the line into hash and filename + hash_value, filename = line.split(" ") + + # Get the full path of the file by joining the llama path and the filename + file_path = os.path.join(llama_path, filename) + + # Informing user of the progress of the integrity check + logger.info(f"Verifying the checksum of {file_path}") + + # Check if the file exists + if os.path.exists(file_path): + # Calculate the SHA256 checksum of the file using hashlib + file_hash = sha256sum(file_path) + + # Compare the file hash with the expected hash + if file_hash == hash_value: + valid_checksum = "V" + file_missing = "" + else: + valid_checksum = "" + file_missing = "" + else: + valid_checksum = "" + file_missing = "X" + + # Add the results to the array + results.append({ + "filename": filename, + "valid checksum": valid_checksum, + "file missing": file_missing + }) + + +# Print column headers for results table +print("filename".ljust(40) + "valid checksum".center(20) + "file missing".center(20)) # noqa: NP100 +print("-" * 80) # noqa: NP100 + +# Output the results as a table +for r in results: + print(f"{r['filename']:40} {r['valid checksum']:^20} {r['file missing']:^20}") # noqa: NP100 diff --git a/llama.cpp/scripts/xxd.cmake b/llama.cpp/scripts/xxd.cmake new file mode 100644 index 0000000..14d2753 --- /dev/null +++ b/llama.cpp/scripts/xxd.cmake @@ -0,0 +1,16 @@ +# CMake equivalent of `xxd -i ${INPUT} ${OUTPUT}` +# Usage: cmake -DINPUT=tools/server/public/index.html -DOUTPUT=tools/server/index.html.hpp -P scripts/xxd.cmake + +SET(INPUT "" CACHE STRING "Input File") +SET(OUTPUT "" CACHE STRING "Output File") + +get_filename_component(filename "${INPUT}" NAME) +string(REGEX REPLACE "\\.|-" "_" name "${filename}") + +file(READ "${INPUT}" hex_data HEX) +string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," hex_sequence "${hex_data}") + +string(LENGTH ${hex_data} hex_len) +math(EXPR len "${hex_len} / 2") + +file(WRITE "${OUTPUT}" "unsigned char ${name}[] = {${hex_sequence}};\nunsigned int ${name}_len = ${len};\n") |
