summaryrefslogtreecommitdiff
path: root/llama.cpp/examples/model-conversion/scripts/utils/compare_tokens.py
diff options
context:
space:
mode:
Diffstat (limited to 'llama.cpp/examples/model-conversion/scripts/utils/compare_tokens.py')
-rwxr-xr-xllama.cpp/examples/model-conversion/scripts/utils/compare_tokens.py76
1 files changed, 76 insertions, 0 deletions
diff --git a/llama.cpp/examples/model-conversion/scripts/utils/compare_tokens.py b/llama.cpp/examples/model-conversion/scripts/utils/compare_tokens.py
new file mode 100755
index 0000000..a286cb5
--- /dev/null
+++ b/llama.cpp/examples/model-conversion/scripts/utils/compare_tokens.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python3
+
+import argparse
+import sys
+from common import compare_tokens # type: ignore
+
+
+def parse_arguments():
+ parser = argparse.ArgumentParser(
+ description='Compare tokens between two models',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog="""
+Examples:
+ %(prog)s pytorch-gemma-3-270m-it llamacpp-gemma-3-270m-it-bf16
+ """
+ )
+ parser.add_argument(
+ 'original',
+ help='Original model name'
+ )
+ parser.add_argument(
+ 'converted',
+ help='Converted model name'
+ )
+ parser.add_argument(
+ '-s', '--suffix',
+ default='',
+ help='Type suffix (e.g., "-embeddings")'
+ )
+ parser.add_argument(
+ '-d', '--data-dir',
+ default='data',
+ help='Directory containing token files (default: data)'
+ )
+ parser.add_argument(
+ '-v', '--verbose',
+ action='store_true',
+ help='Print prompts from both models'
+ )
+ return parser.parse_args()
+
+
+def main():
+ args = parse_arguments()
+
+ if args.verbose:
+ from pathlib import Path
+ data_dir = Path(args.data_dir)
+
+ prompt1_file = data_dir / f"{args.original}{args.suffix}-prompt.txt"
+ prompt2_file = data_dir / f"{args.converted}{args.suffix}-prompt.txt"
+
+ if prompt1_file.exists():
+ print(f"\nOriginal model prompt ({args.original}):")
+ print(f" {prompt1_file.read_text().strip()}")
+
+ if prompt2_file.exists():
+ print(f"\nConverted model prompt ({args.converted}):")
+ print(f" {prompt2_file.read_text().strip()}")
+
+ print()
+
+ result = compare_tokens(
+ args.original,
+ args.converted,
+ type_suffix=args.suffix,
+ output_dir=args.data_dir
+ )
+
+ # Enable the script to be used in shell scripts so that they can check
+ # the exit code for success/failure.
+ sys.exit(0 if result else 1)
+
+
+if __name__ == "__main__":
+ main()