1#!/usr/bin/env python3
2from __future__ import annotations
3
4import logging
5import argparse
6import os
7import sys
8import numpy
9import enum
10from pathlib import Path
11from typing import Any, Optional, Tuple, Type
12import warnings
13
14import numpy as np
15from PySide6.QtWidgets import (
16 QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout,
17 QPushButton, QLabel, QLineEdit, QFileDialog, QTableWidget,
18 QTableWidgetItem, QComboBox, QMessageBox, QTabWidget,
19 QTextEdit, QFormLayout,
20 QHeaderView, QDialog, QDialogButtonBox
21)
22from PySide6.QtCore import Qt
23
24# Necessary to load the local gguf package
25if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists():
26 sys.path.insert(0, str(Path(__file__).parent.parent.parent))
27
28import gguf
29from gguf import GGUFReader, GGUFWriter, GGUFValueType, ReaderField
30from gguf.constants import TokenType, RopeScalingType, PoolingType, GGMLQuantizationType
31
32logger = logging.getLogger("gguf-editor-gui")
33
34# Map of key names to enum types for automatic enum interpretation
35KEY_TO_ENUM_TYPE = {
36 gguf.Keys.Tokenizer.TOKEN_TYPE: TokenType,
37 gguf.Keys.Rope.SCALING_TYPE: RopeScalingType,
38 gguf.Keys.LLM.POOLING_TYPE: PoolingType,
39 gguf.Keys.General.FILE_TYPE: GGMLQuantizationType,
40}
41
42# Define the tokenizer keys that should be edited together
43TOKENIZER_LINKED_KEYS = [
44 gguf.Keys.Tokenizer.LIST,
45 gguf.Keys.Tokenizer.TOKEN_TYPE,
46 gguf.Keys.Tokenizer.SCORES
47]
48
49
50class TokenizerEditorDialog(QDialog):
51 def __init__(self, tokens, token_types, scores, parent=None):
52 super().__init__(parent)
53 self.setWindowTitle("Edit Tokenizer Data")
54 self.resize(900, 600)
55
56 self.tokens = tokens.copy() if tokens else []
57 self.token_types = token_types.copy() if token_types else []
58 self.scores = scores.copy() if scores else []
59
60 # Ensure all arrays have the same length
61 max_len = max(len(self.tokens), len(self.token_types), len(self.scores))
62 if len(self.tokens) < max_len:
63 self.tokens.extend([""] * (max_len - len(self.tokens)))
64 if len(self.token_types) < max_len:
65 self.token_types.extend([0] * (max_len - len(self.token_types)))
66 if len(self.scores) < max_len:
67 self.scores.extend([0.0] * (max_len - len(self.scores)))
68
69 layout = QVBoxLayout(self)
70
71 # Add filter controls
72 filter_layout = QHBoxLayout()
73 filter_layout.addWidget(QLabel("Filter:"))
74 self.filter_edit = QLineEdit()
75 self.filter_edit.setPlaceholderText("Type to filter tokens...")
76 self.filter_edit.textChanged.connect(self.apply_filter)
77 filter_layout.addWidget(self.filter_edit)
78
79 # Add page controls
80 self.page_size = 100 # Show 100 items per page
81 self.current_page = 0
82 self.total_pages = max(1, (len(self.tokens) + self.page_size - 1) // self.page_size)
83
84 self.page_label = QLabel(f"Page 1 of {self.total_pages}")
85 filter_layout.addWidget(self.page_label)
86
87 prev_page = QPushButton("Previous")
88 prev_page.clicked.connect(self.previous_page)
89 filter_layout.addWidget(prev_page)
90
91 next_page = QPushButton("Next")
92 next_page.clicked.connect(self.next_page)
93 filter_layout.addWidget(next_page)
94
95 layout.addLayout(filter_layout)
96
97 # Tokenizer data table
98 self.tokens_table = QTableWidget()
99 self.tokens_table.setColumnCount(4)
100 self.tokens_table.setHorizontalHeaderLabels(["Index", "Token", "Type", "Score"])
101 self.tokens_table.horizontalHeader().setSectionResizeMode(0, QHeaderView.ResizeMode.ResizeToContents)
102 self.tokens_table.horizontalHeader().setSectionResizeMode(1, QHeaderView.ResizeMode.Stretch)
103 self.tokens_table.horizontalHeader().setSectionResizeMode(2, QHeaderView.ResizeMode.ResizeToContents)
104 self.tokens_table.horizontalHeader().setSectionResizeMode(3, QHeaderView.ResizeMode.ResizeToContents)
105
106 layout.addWidget(self.tokens_table)
107
108 # Controls
109 controls_layout = QHBoxLayout()
110
111 add_button = QPushButton("Add Token")
112 add_button.clicked.connect(self.add_token)
113 controls_layout.addWidget(add_button)
114
115 remove_button = QPushButton("Remove Selected")
116 remove_button.clicked.connect(self.remove_selected)
117 controls_layout.addWidget(remove_button)
118
119 controls_layout.addStretch()
120
121 layout.addLayout(controls_layout)
122
123 # Buttons
124 buttons = QDialogButtonBox(QDialogButtonBox.StandardButton.Ok | QDialogButtonBox.StandardButton.Cancel)
125 buttons.accepted.connect(self.accept)
126 buttons.rejected.connect(self.reject)
127 layout.addWidget(buttons)
128
129 # Initialize the filtered values
130 self.filtered_indices = list(range(len(self.tokens)))
131
132 # Load data for the first page
133 self.load_page()
134
135 def apply_filter(self):
136 """Filter the tokens based on the search text."""
137 filter_text = self.filter_edit.text().lower()
138
139 if not filter_text:
140 # No filter, show all values
141 self.filtered_indices = list(range(len(self.tokens)))
142 else:
143 # Apply filter
144 self.filtered_indices = []
145 for i, token in enumerate(self.tokens):
146 if filter_text in str(token).lower():
147 self.filtered_indices.append(i)
148
149 # Reset to first page and reload
150 self.total_pages = max(1, (len(self.filtered_indices) + self.page_size - 1) // self.page_size)
151 self.current_page = 0
152 self.page_label.setText(f"Page 1 of {self.total_pages}")
153 self.load_page()
154
155 def previous_page(self):
156 """Go to the previous page of results."""
157 if self.current_page > 0:
158 self.current_page -= 1
159 self.page_label.setText(f"Page {self.current_page + 1} of {self.total_pages}")
160 self.load_page()
161
162 def next_page(self):
163 """Go to the next page of results."""
164 if self.current_page < self.total_pages - 1:
165 self.current_page += 1
166 self.page_label.setText(f"Page {self.current_page + 1} of {self.total_pages}")
167 self.load_page()
168
169 def load_page(self):
170 """Load the current page of tokenizer data."""
171 self.tokens_table.setRowCount(0) # Clear the table
172
173 # Calculate start and end indices for the current page
174 start_idx = self.current_page * self.page_size
175 end_idx = min(start_idx + self.page_size, len(self.filtered_indices))
176
177 # Pre-allocate rows for better performance
178 self.tokens_table.setRowCount(end_idx - start_idx)
179
180 for row, i in enumerate(range(start_idx, end_idx)):
181 orig_idx = self.filtered_indices[i]
182
183 # Index
184 index_item = QTableWidgetItem(str(orig_idx))
185 index_item.setData(Qt.ItemDataRole.UserRole, orig_idx) # Store original index
186 index_item.setFlags(index_item.flags() & ~Qt.ItemFlag.ItemIsEditable)
187 self.tokens_table.setItem(row, 0, index_item)
188
189 # Token
190 token_item = QTableWidgetItem(str(self.tokens[orig_idx]))
191 self.tokens_table.setItem(row, 1, token_item)
192
193 # Token Type
194 token_type = self.token_types[orig_idx] if orig_idx < len(self.token_types) else 0
195 try:
196 enum_val = TokenType(token_type)
197 display_text = f"{enum_val.name} ({token_type})"
198 except (ValueError, KeyError):
199 display_text = f"Unknown ({token_type})"
200
201 type_item = QTableWidgetItem(display_text)
202 type_item.setData(Qt.ItemDataRole.UserRole, token_type)
203
204 # Make type cell editable with a double-click handler
205 type_item.setFlags(type_item.flags() & ~Qt.ItemFlag.ItemIsEditable)
206 self.tokens_table.setItem(row, 2, type_item)
207
208 # Score
209 score = self.scores[orig_idx] if orig_idx < len(self.scores) else 0.0
210 score_item = QTableWidgetItem(str(score))
211 self.tokens_table.setItem(row, 3, score_item)
212
213 # Connect double-click handler for token type cells
214 self.tokens_table.cellDoubleClicked.connect(self.handle_cell_double_click)
215
216 def handle_cell_double_click(self, row, column):
217 """Handle double-click on a cell, specifically for token type editing."""
218 if column == 2: # Token Type column
219 orig_item = self.tokens_table.item(row, 0)
220 if orig_item:
221 orig_idx = orig_item.data(Qt.ItemDataRole.UserRole)
222 self.edit_token_type(row, orig_idx)
223
224 def edit_token_type(self, row, orig_idx):
225 """Edit a token type using a dialog with a dropdown of all enum options."""
226 current_value = self.token_types[orig_idx] if orig_idx < len(self.token_types) else 0
227
228 # Create a dialog with enum options
229 dialog = QDialog(self)
230 dialog.setWindowTitle("Select Token Type")
231 layout = QVBoxLayout(dialog)
232
233 combo = QComboBox()
234 for enum_val in TokenType:
235 combo.addItem(f"{enum_val.name} ({enum_val.value})", enum_val.value)
236
237 # Set current value
238 try:
239 if isinstance(current_value, int):
240 enum_val = TokenType(current_value)
241 combo.setCurrentText(f"{enum_val.name} ({current_value})")
242 except (ValueError, KeyError):
243 pass
244
245 layout.addWidget(combo)
246
247 buttons = QDialogButtonBox(QDialogButtonBox.StandardButton.Ok | QDialogButtonBox.StandardButton.Cancel)
248 buttons.accepted.connect(dialog.accept)
249 buttons.rejected.connect(dialog.reject)
250 layout.addWidget(buttons)
251
252 if dialog.exec() == QDialog.DialogCode.Accepted:
253 # Get the selected value
254 new_value = combo.currentData()
255 enum_val = TokenType(new_value)
256 display_text = f"{enum_val.name} ({new_value})"
257
258 # Update the display
259 type_item = self.tokens_table.item(row, 2)
260 if type_item:
261 type_item.setText(display_text)
262 type_item.setData(Qt.ItemDataRole.UserRole, new_value)
263
264 # Update the actual value
265 self.token_types[orig_idx] = new_value
266
267 def add_token(self):
268 """Add a new token to the end of the list."""
269 # Add to the end of the arrays
270 self.tokens.append("")
271 self.token_types.append(0) # Default to normal token
272 self.scores.append(0.0)
273
274 orig_idx = len(self.tokens) - 1
275
276 # Add to filtered indices if it matches the current filter
277 filter_text = self.filter_edit.text().lower()
278 if not filter_text or filter_text in "":
279 self.filtered_indices.append(orig_idx)
280
281 # Update pagination
282 self.total_pages = max(1, (len(self.filtered_indices) + self.page_size - 1) // self.page_size)
283
284 # Go to the last page to show the new item
285 self.current_page = self.total_pages - 1
286 self.page_label.setText(f"Page {self.current_page + 1} of {self.total_pages}")
287
288 # Reload the page
289 self.load_page()
290
291 def remove_selected(self):
292 """Remove selected tokens from all arrays."""
293 selected_rows = []
294 for item in self.tokens_table.selectedItems():
295 row = item.row()
296 if row not in selected_rows:
297 selected_rows.append(row)
298
299 if not selected_rows:
300 return
301
302 # Get original indices in descending order to avoid index shifting
303 orig_indices = []
304 for row in selected_rows:
305 orig_item = self.tokens_table.item(row, 0)
306 if orig_item:
307 orig_indices.append(orig_item.data(Qt.ItemDataRole.UserRole))
308 orig_indices.sort(reverse=True)
309
310 # Remove from all arrays
311 for idx in orig_indices:
312 if idx < len(self.tokens):
313 del self.tokens[idx]
314 if idx < len(self.token_types):
315 del self.token_types[idx]
316 if idx < len(self.scores):
317 del self.scores[idx]
318
319 # Rebuild filtered_indices
320 self.filtered_indices = []
321 filter_text = self.filter_edit.text().lower()
322
323 for i, token in enumerate(self.tokens):
324 if not filter_text or filter_text in str(token).lower():
325 self.filtered_indices.append(i)
326
327 # Update pagination
328 self.total_pages = max(1, (len(self.filtered_indices) + self.page_size - 1) // self.page_size)
329 self.current_page = min(self.current_page, self.total_pages - 1)
330 self.page_label.setText(f"Page {self.current_page + 1} of {self.total_pages}")
331
332 # Reload the page
333 self.load_page()
334
335 def get_data(self):
336 """Return the edited tokenizer data."""
337 return self.tokens, self.token_types, self.scores
338
339
340class ArrayEditorDialog(QDialog):
341 def __init__(self, array_values, element_type, key=None, parent=None):
342 super().__init__(parent)
343 self.setWindowTitle("Edit Array Values")
344 self.resize(700, 500)
345
346 self.array_values = array_values
347 self.element_type = element_type
348 self.key = key
349
350 # Get enum type for this array if applicable
351 self.enum_type = None
352 if key in KEY_TO_ENUM_TYPE and element_type == GGUFValueType.INT32:
353 self.enum_type = KEY_TO_ENUM_TYPE[key]
354
355 layout = QVBoxLayout(self)
356
357 # Add enum type information if applicable
358 if self.enum_type is not None:
359 enum_info_layout = QHBoxLayout()
360 enum_label = QLabel(f"Editing {self.enum_type.__name__} values:")
361 enum_info_layout.addWidget(enum_label)
362
363 # Add a legend for the enum values
364 enum_values = ", ".join([f"{e.name}={e.value}" for e in self.enum_type])
365 enum_values_label = QLabel(f"Available values: {enum_values}")
366 enum_values_label.setWordWrap(True)
367 enum_info_layout.addWidget(enum_values_label, 1)
368
369 layout.addLayout(enum_info_layout)
370
371 # Add search/filter controls
372 filter_layout = QHBoxLayout()
373 filter_layout.addWidget(QLabel("Filter:"))
374 self.filter_edit = QLineEdit()
375 self.filter_edit.setPlaceholderText("Type to filter values...")
376 self.filter_edit.textChanged.connect(self.apply_filter)
377 filter_layout.addWidget(self.filter_edit)
378
379 # Add page controls for large arrays
380 self.page_size = 100 # Show 100 items per page
381 self.current_page = 0
382 self.total_pages = max(1, (len(array_values) + self.page_size - 1) // self.page_size)
383
384 self.page_label = QLabel(f"Page 1 of {self.total_pages}")
385 filter_layout.addWidget(self.page_label)
386
387 prev_page = QPushButton("Previous")
388 prev_page.clicked.connect(self.previous_page)
389 filter_layout.addWidget(prev_page)
390
391 next_page = QPushButton("Next")
392 next_page.clicked.connect(self.next_page)
393 filter_layout.addWidget(next_page)
394
395 layout.addLayout(filter_layout)
396
397 # Array items table
398 self.items_table = QTableWidget()
399
400 # Set up columns based on whether we have an enum type
401 if self.enum_type is not None:
402 self.items_table.setColumnCount(3)
403 self.items_table.setHorizontalHeaderLabels(["Index", "Value", "Actions"])
404 self.items_table.horizontalHeader().setSectionResizeMode(0, QHeaderView.ResizeMode.ResizeToContents)
405 self.items_table.horizontalHeader().setSectionResizeMode(1, QHeaderView.ResizeMode.Stretch)
406 self.items_table.horizontalHeader().setSectionResizeMode(2, QHeaderView.ResizeMode.ResizeToContents)
407 else:
408 self.items_table.setColumnCount(2)
409 self.items_table.setHorizontalHeaderLabels(["Index", "Value"])
410 self.items_table.horizontalHeader().setSectionResizeMode(0, QHeaderView.ResizeMode.ResizeToContents)
411 self.items_table.horizontalHeader().setSectionResizeMode(1, QHeaderView.ResizeMode.Stretch)
412
413 layout.addWidget(self.items_table)
414
415 # Controls
416 controls_layout = QHBoxLayout()
417
418 add_button = QPushButton("Add Item")
419 add_button.clicked.connect(self.add_item)
420 controls_layout.addWidget(add_button)
421
422 remove_button = QPushButton("Remove Selected")
423 remove_button.clicked.connect(self.remove_selected)
424 controls_layout.addWidget(remove_button)
425
426 # Add bulk edit button for enum arrays
427 if self.enum_type is not None:
428 bulk_edit_button = QPushButton("Bulk Edit Selected")
429 bulk_edit_button.clicked.connect(self.bulk_edit_selected)
430 controls_layout.addWidget(bulk_edit_button)
431
432 controls_layout.addStretch()
433
434 layout.addLayout(controls_layout)
435
436 # Buttons
437 buttons = QDialogButtonBox(QDialogButtonBox.StandardButton.Ok | QDialogButtonBox.StandardButton.Cancel)
438 buttons.accepted.connect(self.accept)
439 buttons.rejected.connect(self.reject)
440 layout.addWidget(buttons)
441
442 # Initialize the filtered values
443 self.filtered_indices = list(range(len(self.array_values)))
444
445 # Load array values for the first page
446 self.load_page()
447
448 def apply_filter(self):
449 """Filter the array values based on the search text."""
450 filter_text = self.filter_edit.text().lower()
451
452 if not filter_text:
453 # No filter, show all values
454 self.filtered_indices = list(range(len(self.array_values)))
455 else:
456 # Apply filter
457 self.filtered_indices = []
458 for i, value in enumerate(self.array_values):
459 # For enum values, search in both name and value
460 if self.enum_type is not None and isinstance(value, int):
461 try:
462 enum_val = self.enum_type(value)
463 display_text = f"{enum_val.name} ({value})".lower()
464 if filter_text in display_text:
465 self.filtered_indices.append(i)
466 except (ValueError, KeyError):
467 # If not a valid enum value, just check the raw value
468 if filter_text in str(value).lower():
469 self.filtered_indices.append(i)
470 else:
471 # For non-enum values, just check the string representation
472 if filter_text in str(value).lower():
473 self.filtered_indices.append(i)
474
475 # Reset to first page and reload
476 self.total_pages = max(1, (len(self.filtered_indices) + self.page_size - 1) // self.page_size)
477 self.current_page = 0
478 self.page_label.setText(f"Page 1 of {self.total_pages}")
479 self.load_page()
480
481 def previous_page(self):
482 """Go to the previous page of results."""
483 if self.current_page > 0:
484 self.current_page -= 1
485 self.page_label.setText(f"Page {self.current_page + 1} of {self.total_pages}")
486 self.load_page()
487
488 def next_page(self):
489 """Go to the next page of results."""
490 if self.current_page < self.total_pages - 1:
491 self.current_page += 1
492 self.page_label.setText(f"Page {self.current_page + 1} of {self.total_pages}")
493 self.load_page()
494
495 def load_page(self):
496 """Load the current page of array values."""
497 self.items_table.setRowCount(0) # Clear the table
498
499 # Calculate start and end indices for the current page
500 start_idx = self.current_page * self.page_size
501 end_idx = min(start_idx + self.page_size, len(self.filtered_indices))
502
503 # Pre-allocate rows for better performance
504 self.items_table.setRowCount(end_idx - start_idx)
505
506 for row, i in enumerate(range(start_idx, end_idx)):
507 orig_idx = self.filtered_indices[i]
508 value = self.array_values[orig_idx]
509
510 # Index
511 index_item = QTableWidgetItem(str(orig_idx))
512 index_item.setData(Qt.ItemDataRole.UserRole, orig_idx) # Store original index
513 index_item.setFlags(index_item.flags() & ~Qt.ItemFlag.ItemIsEditable)
514 self.items_table.setItem(row, 0, index_item)
515
516 # Value
517 if self.enum_type is not None:
518 # Display enum value and name
519 try:
520 if isinstance(value, (int, numpy.signedinteger)):
521 enum_val = self.enum_type(value)
522 display_text = f"{enum_val.name} ({value})"
523 else:
524 display_text = str(value)
525 except (ValueError, KeyError):
526 display_text = f"Unknown ({value})"
527
528 # Store the enum value in the item
529 value_item = QTableWidgetItem(display_text)
530 value_item.setData(Qt.ItemDataRole.UserRole, value)
531 value_item.setFlags(value_item.flags() & ~Qt.ItemFlag.ItemIsEditable)
532 self.items_table.setItem(row, 1, value_item)
533
534 # Add an edit button in a separate column
535 edit_button = QPushButton("Edit")
536 edit_button.setProperty("row", row)
537 edit_button.clicked.connect(self.edit_array_enum_value)
538
539 # Create a widget to hold the button
540 button_widget = QWidget()
541 button_layout = QHBoxLayout(button_widget)
542 button_layout.setContentsMargins(2, 2, 2, 2)
543 button_layout.addWidget(edit_button)
544 button_layout.addStretch()
545
546 self.items_table.setCellWidget(row, 2, button_widget)
547 else:
548 value_item = QTableWidgetItem(str(value))
549 self.items_table.setItem(row, 1, value_item)
550
551 def edit_array_enum_value(self):
552 """Handle editing an enum value in the array editor."""
553 button = self.sender()
554 row = button.property("row")
555
556 # Get the original index from the table item
557 orig_item = self.items_table.item(row, 0)
558 new_item = self.items_table.item(row, 1)
559 if orig_item and new_item and self.enum_type and self.edit_enum_value(row, self.enum_type):
560 orig_idx = orig_item.data(Qt.ItemDataRole.UserRole)
561 new_value = new_item.data(Qt.ItemDataRole.UserRole)
562 # Update the stored value in the array
563 if isinstance(new_value, (int, float, str, bool)):
564 self.array_values[orig_idx] = new_value
565
566 def bulk_edit_selected(self):
567 """Edit multiple enum values at once."""
568 if not self.enum_type:
569 return
570
571 selected_rows = set()
572 for item in self.items_table.selectedItems():
573 selected_rows.add(item.row())
574
575 if not selected_rows:
576 QMessageBox.information(self, "No Selection", "Please select at least one row to edit.")
577 return
578
579 # Create a dialog with enum options
580 dialog = QDialog(self)
581 dialog.setWindowTitle(f"Bulk Edit {self.enum_type.__name__} Values")
582 layout = QVBoxLayout(dialog)
583
584 layout.addWidget(QLabel(f"Set {len(selected_rows)} selected items to:"))
585
586 combo = QComboBox()
587 for enum_val in self.enum_type:
588 combo.addItem(f"{enum_val.name} ({enum_val.value})", enum_val.value)
589
590 layout.addWidget(combo)
591
592 buttons = QDialogButtonBox(QDialogButtonBox.StandardButton.Ok | QDialogButtonBox.StandardButton.Cancel)
593 buttons.accepted.connect(dialog.accept)
594 buttons.rejected.connect(dialog.reject)
595 layout.addWidget(buttons)
596
597 if dialog.exec() == QDialog.DialogCode.Accepted:
598 # Get the selected value
599 new_value = combo.currentData()
600 enum_val = self.enum_type(new_value)
601 display_text = f"{enum_val.name} ({new_value})"
602
603 # Update all selected rows
604 for row in selected_rows:
605 orig_item = self.items_table.item(row, 0)
606 new_item = self.items_table.item(row, 1)
607 if orig_item and new_item:
608 orig_idx = orig_item.data(Qt.ItemDataRole.UserRole)
609 self.array_values[orig_idx] = new_value
610
611 # Update the display
612 new_item.setText(display_text)
613 new_item.setData(Qt.ItemDataRole.UserRole, new_value)
614
615 def add_item(self):
616 # Add to the end of the array
617 orig_idx = len(self.array_values)
618
619 # Add default value based on type
620 if self.enum_type is not None:
621 # Default to first enum value
622 default_value = list(self.enum_type)[0].value
623 self.array_values.append(default_value)
624 else:
625 if self.element_type == GGUFValueType.STRING:
626 self.array_values.append("")
627 else:
628 self.array_values.append(0)
629
630 # Add to filtered indices if it matches the current filter
631 self.filtered_indices.append(orig_idx)
632
633 # Update pagination
634 self.total_pages = max(1, (len(self.filtered_indices) + self.page_size - 1) // self.page_size)
635
636 # Go to the last page to show the new item
637 self.current_page = self.total_pages - 1
638 self.page_label.setText(f"Page {self.current_page + 1} of {self.total_pages}")
639
640 # Reload the page
641 self.load_page()
642
643 def remove_selected(self):
644 selected_rows = []
645 for item in self.items_table.selectedItems():
646 row = item.row()
647 if row not in selected_rows:
648 selected_rows.append(row)
649
650 if not selected_rows:
651 return
652
653 # Get original indices in descending order to avoid index shifting
654 orig_indices = list()
655 for row in selected_rows:
656 orig_item = self.items_table.item(row, 0)
657 if orig_item:
658 orig_indices.append(orig_item.data(Qt.ItemDataRole.UserRole))
659 orig_indices.sort(reverse=True)
660
661 # Remove from array_values
662 for idx in orig_indices:
663 del self.array_values[idx]
664
665 # Rebuild filtered_indices
666 self.filtered_indices = []
667 filter_text = self.filter_edit.text().lower()
668
669 for i, value in enumerate(self.array_values):
670 if not filter_text:
671 self.filtered_indices.append(i)
672 else:
673 # Apply filter
674 if self.enum_type is not None and isinstance(value, int):
675 try:
676 enum_val = self.enum_type(value)
677 display_text = f"{enum_val.name} ({value})".lower()
678 if filter_text in display_text:
679 self.filtered_indices.append(i)
680 except (ValueError, KeyError):
681 if filter_text in str(value).lower():
682 self.filtered_indices.append(i)
683 else:
684 if filter_text in str(value).lower():
685 self.filtered_indices.append(i)
686
687 # Update pagination
688 self.total_pages = max(1, (len(self.filtered_indices) + self.page_size - 1) // self.page_size)
689 self.current_page = min(self.current_page, self.total_pages - 1)
690 self.page_label.setText(f"Page {self.current_page + 1} of {self.total_pages}")
691
692 # Reload the page
693 self.load_page()
694
695 def edit_enum_value(self, row: int, enum_type: Type[enum.Enum]):
696 """Edit an enum value using a dialog with a dropdown of all enum options."""
697 # Get the original index from the table item
698 orig_item = self.items_table.item(row, 0)
699 if orig_item:
700 orig_idx = orig_item.data(Qt.ItemDataRole.UserRole)
701 else:
702 return
703 current_value = self.array_values[orig_idx]
704
705 # Create a dialog with enum options
706 dialog = QDialog(self)
707 dialog.setWindowTitle(f"Select {enum_type.__name__} Value")
708 layout = QVBoxLayout(dialog)
709
710 # Add description
711 description = QLabel(f"Select a {enum_type.__name__} value:")
712 layout.addWidget(description)
713
714 # Use a combo box for quick selection
715 combo = QComboBox()
716 for enum_val in enum_type:
717 combo.addItem(f"{enum_val.name} ({enum_val.value})", enum_val.value)
718
719 # Set current value
720 try:
721 if isinstance(current_value, int):
722 enum_val = enum_type(current_value)
723 combo.setCurrentText(f"{enum_val.name} ({current_value})")
724 except (ValueError, KeyError):
725 pass
726
727 layout.addWidget(combo)
728
729 buttons = QDialogButtonBox(QDialogButtonBox.StandardButton.Ok | QDialogButtonBox.StandardButton.Cancel)
730 buttons.accepted.connect(dialog.accept)
731 buttons.rejected.connect(dialog.reject)
732 layout.addWidget(buttons)
733
734 if dialog.exec() == QDialog.DialogCode.Accepted:
735 # Update the value display and stored data
736 new_value = combo.currentData()
737 enum_val = enum_type(new_value)
738 display_text = f"{enum_val.name} ({new_value})"
739
740 new_item = self.items_table.item(row, 1)
741 if new_item:
742 new_item.setText(display_text)
743 new_item.setData(Qt.ItemDataRole.UserRole, new_value)
744
745 # Update the actual array value
746 self.array_values[orig_idx] = new_value
747 return True
748 return False
749
750 def get_array_values(self):
751 # The array_values list is kept up-to-date as edits are made
752 return self.array_values
753
754
755class AddMetadataDialog(QDialog):
756 def __init__(self, parent=None):
757 super().__init__(parent)
758 self.setWindowTitle("Add Metadata")
759 self.resize(400, 200)
760
761 layout = QVBoxLayout(self)
762
763 form_layout = QFormLayout()
764
765 self.key_edit = QLineEdit()
766 form_layout.addRow("Key:", self.key_edit)
767
768 self.type_combo = QComboBox()
769 for value_type in GGUFValueType:
770 if value_type != GGUFValueType.ARRAY: # Skip array type for simplicity
771 self.type_combo.addItem(value_type.name, value_type)
772 form_layout.addRow("Type:", self.type_combo)
773
774 self.value_edit = QTextEdit()
775 form_layout.addRow("Value:", self.value_edit)
776
777 layout.addLayout(form_layout)
778
779 buttons = QDialogButtonBox(QDialogButtonBox.StandardButton.Ok | QDialogButtonBox.StandardButton.Cancel)
780 buttons.accepted.connect(self.accept)
781 buttons.rejected.connect(self.reject)
782 layout.addWidget(buttons)
783
784 def get_data(self) -> Tuple[str, GGUFValueType, Any]:
785 key = self.key_edit.text()
786 value_type = self.type_combo.currentData()
787 value_text = self.value_edit.toPlainText()
788
789 # Convert value based on type
790 if value_type == GGUFValueType.UINT8:
791 value = np.uint8(int(value_text))
792 elif value_type == GGUFValueType.INT8:
793 value = np.int8(int(value_text))
794 elif value_type == GGUFValueType.UINT16:
795 value = np.uint16(int(value_text))
796 elif value_type == GGUFValueType.INT16:
797 value = np.int16(int(value_text))
798 elif value_type == GGUFValueType.UINT32:
799 value = np.uint32(int(value_text))
800 elif value_type == GGUFValueType.INT32:
801 value = np.int32(int(value_text))
802 elif value_type == GGUFValueType.FLOAT32:
803 value = np.float32(float(value_text))
804 elif value_type == GGUFValueType.BOOL:
805 value = value_text.lower() in ('true', 'yes', '1')
806 elif value_type == GGUFValueType.STRING:
807 value = value_text
808 else:
809 value = value_text
810
811 return key, value_type, value
812
813
814class GGUFEditorWindow(QMainWindow):
815 def __init__(self):
816 super().__init__()
817
818 self.setWindowTitle("GGUF Editor")
819 self.resize(1000, 800)
820
821 self.current_file = None
822 self.reader = None
823 self.modified = False
824 self.metadata_changes = {} # Store changes to apply when saving
825 self.metadata_to_remove = set() # Store keys to remove when saving
826 self.on_metadata_changed_is_connected = False
827
828 self.setup_ui()
829
830 def setup_ui(self):
831 central_widget = QWidget()
832 self.setCentralWidget(central_widget)
833
834 main_layout = QVBoxLayout(central_widget)
835
836 # File controls
837 file_layout = QHBoxLayout()
838
839 self.file_path_edit = QLineEdit()
840 self.file_path_edit.setReadOnly(True)
841 file_layout.addWidget(self.file_path_edit)
842
843 open_button = QPushButton("Open GGUF")
844 open_button.clicked.connect(self.open_file)
845 file_layout.addWidget(open_button)
846
847 save_button = QPushButton("Save As...")
848 save_button.clicked.connect(self.save_file)
849 file_layout.addWidget(save_button)
850
851 main_layout.addLayout(file_layout)
852
853 # Tabs for different views
854 self.tabs = QTabWidget()
855
856 # Metadata tab
857 self.metadata_tab = QWidget()
858 metadata_layout = QVBoxLayout(self.metadata_tab)
859
860 # Metadata table
861 self.metadata_table = QTableWidget()
862 self.metadata_table.setColumnCount(4)
863 self.metadata_table.setHorizontalHeaderLabels(["Key", "Type", "Value", "Actions"])
864 self.metadata_table.horizontalHeader().setSectionResizeMode(0, QHeaderView.ResizeMode.Stretch)
865 self.metadata_table.horizontalHeader().setSectionResizeMode(1, QHeaderView.ResizeMode.ResizeToContents)
866 self.metadata_table.horizontalHeader().setSectionResizeMode(2, QHeaderView.ResizeMode.Stretch)
867 self.metadata_table.horizontalHeader().setSectionResizeMode(3, QHeaderView.ResizeMode.ResizeToContents)
868 metadata_layout.addWidget(self.metadata_table)
869
870 # Metadata controls
871 metadata_controls = QHBoxLayout()
872
873 add_metadata_button = QPushButton("Add Metadata")
874 add_metadata_button.clicked.connect(self.add_metadata)
875 metadata_controls.addWidget(add_metadata_button)
876
877 metadata_controls.addStretch()
878
879 metadata_layout.addLayout(metadata_controls)
880
881 # Tensors tab
882 self.tensors_tab = QWidget()
883 tensors_layout = QVBoxLayout(self.tensors_tab)
884
885 self.tensors_table = QTableWidget()
886 self.tensors_table.setColumnCount(5)
887 self.tensors_table.setHorizontalHeaderLabels(["Name", "Type", "Shape", "Elements", "Size (bytes)"])
888 self.tensors_table.horizontalHeader().setSectionResizeMode(0, QHeaderView.ResizeMode.Stretch)
889 self.tensors_table.horizontalHeader().setSectionResizeMode(1, QHeaderView.ResizeMode.ResizeToContents)
890 self.tensors_table.horizontalHeader().setSectionResizeMode(2, QHeaderView.ResizeMode.ResizeToContents)
891 self.tensors_table.horizontalHeader().setSectionResizeMode(3, QHeaderView.ResizeMode.ResizeToContents)
892 self.tensors_table.horizontalHeader().setSectionResizeMode(4, QHeaderView.ResizeMode.ResizeToContents)
893 tensors_layout.addWidget(self.tensors_table)
894
895 # Add tabs to tab widget
896 self.tabs.addTab(self.metadata_tab, "Metadata")
897 self.tabs.addTab(self.tensors_tab, "Tensors")
898
899 main_layout.addWidget(self.tabs)
900
901 # Status bar
902 self.statusBar().showMessage("Ready")
903
904 def load_file(self, file_path):
905 """Load a GGUF file by path"""
906 try:
907 self.statusBar().showMessage(f"Loading {file_path}...")
908 QApplication.processEvents()
909
910 self.reader = GGUFReader(file_path, 'r')
911 self.current_file = file_path
912 self.file_path_edit.setText(file_path)
913
914 self.load_metadata()
915 self.load_tensors()
916
917 self.metadata_changes = {}
918 self.metadata_to_remove = set()
919 self.modified = False
920
921 self.statusBar().showMessage(f"Loaded {file_path}")
922 return True
923 except Exception as e:
924 QMessageBox.critical(self, "Error", f"Failed to open file: {str(e)}")
925 self.statusBar().showMessage("Error loading file")
926 return False
927
928 def open_file(self):
929 file_path, _ = QFileDialog.getOpenFileName(
930 self, "Open GGUF File", "", "GGUF Files (*.gguf);;All Files (*)"
931 )
932
933 if not file_path:
934 return
935
936 self.load_file(file_path)
937
938 def load_metadata(self):
939 self.metadata_table.setRowCount(0)
940
941 if not self.reader:
942 return
943
944 # Disconnect to prevent triggering during loading
945 if self.on_metadata_changed_is_connected:
946 with warnings.catch_warnings():
947 warnings.filterwarnings('ignore')
948 self.metadata_table.itemChanged.disconnect(self.on_metadata_changed)
949 self.on_metadata_changed_is_connected = False
950
951 for i, (key, field) in enumerate(self.reader.fields.items()):
952 self.metadata_table.insertRow(i)
953
954 # Key
955 key_item = QTableWidgetItem(key)
956 key_item.setFlags(key_item.flags() & ~Qt.ItemFlag.ItemIsEditable)
957 self.metadata_table.setItem(i, 0, key_item)
958
959 # Type
960 if not field.types:
961 type_str = "N/A"
962 elif field.types[0] == GGUFValueType.ARRAY:
963 nest_count = len(field.types) - 1
964 element_type = field.types[-1].name
965 # Check if this is an enum array
966 enum_type = self.get_enum_for_key(key)
967 if enum_type is not None and field.types[-1] == GGUFValueType.INT32:
968 element_type = enum_type.__name__
969 type_str = '[' * nest_count + element_type + ']' * nest_count
970 else:
971 type_str = str(field.types[0].name)
972 # Check if this is an enum field
973 enum_type = self.get_enum_for_key(key)
974 if enum_type is not None and field.types[0] == GGUFValueType.INT32:
975 type_str = enum_type.__name__
976
977 type_item = QTableWidgetItem(type_str)
978 type_item.setFlags(type_item.flags() & ~Qt.ItemFlag.ItemIsEditable)
979 self.metadata_table.setItem(i, 1, type_item)
980
981 # Value
982 value_str = self.format_field_value(field)
983 value_item = QTableWidgetItem(value_str)
984
985 # Make only simple values editable
986 if len(field.types) == 1 and field.types[0] != GGUFValueType.ARRAY:
987 value_item.setFlags(value_item.flags() | Qt.ItemFlag.ItemIsEditable)
988 else:
989 value_item.setFlags(value_item.flags() & ~Qt.ItemFlag.ItemIsEditable)
990
991 self.metadata_table.setItem(i, 2, value_item)
992
993 # Actions
994 actions_widget = QWidget()
995 actions_layout = QHBoxLayout(actions_widget)
996 actions_layout.setContentsMargins(2, 2, 2, 2)
997
998 # Add Edit button for arrays and enum fields
999 if field.types and field.types[0] == GGUFValueType.ARRAY:
1000 edit_button = QPushButton("Edit")
1001 edit_button.setProperty("row", i)
1002 edit_button.setProperty("key", key)
1003 edit_button.clicked.connect(self.edit_array_metadata)
1004 actions_layout.addWidget(edit_button)
1005
1006 # Add special label for tokenizer linked fields
1007 if key in TOKENIZER_LINKED_KEYS:
1008 edit_button.setText("Edit Tokenizer")
1009 edit_button.setToolTip("Edit all tokenizer data together")
1010 elif len(field.types) == 1 and self.get_enum_for_key(key) is not None:
1011 edit_button = QPushButton("Edit")
1012 edit_button.setProperty("row", i)
1013 edit_button.setProperty("key", key)
1014 edit_button.clicked.connect(self.edit_metadata_enum)
1015 actions_layout.addWidget(edit_button)
1016
1017 remove_button = QPushButton("Remove")
1018 remove_button.setProperty("row", i)
1019 remove_button.setProperty("key", key)
1020 remove_button.clicked.connect(self.remove_metadata)
1021 actions_layout.addWidget(remove_button)
1022
1023 self.metadata_table.setCellWidget(i, 3, actions_widget)
1024
1025 # Reconnect after loading
1026 self.metadata_table.itemChanged.connect(self.on_metadata_changed)
1027 self.on_metadata_changed_is_connected = True
1028
1029 def extract_array_values(self, field: ReaderField) -> list:
1030 """Extract all values from an array field."""
1031 if not field.types or field.types[0] != GGUFValueType.ARRAY:
1032 return []
1033
1034 curr_type = field.types[1]
1035 array_values = []
1036 total_elements = len(field.data)
1037
1038 if curr_type == GGUFValueType.STRING:
1039 for element_pos in range(total_elements):
1040 value_string = str(bytes(field.parts[-1 - (total_elements - element_pos - 1) * 2]), encoding='utf-8')
1041 array_values.append(value_string)
1042 elif self.reader and curr_type in self.reader.gguf_scalar_to_np:
1043 for element_pos in range(total_elements):
1044 array_values.append(field.parts[-1 - (total_elements - element_pos - 1)][0])
1045
1046 return array_values
1047
1048 def get_enum_for_key(self, key: str) -> Optional[Type[enum.Enum]]:
1049 """Get the enum type for a given key if it exists."""
1050 return KEY_TO_ENUM_TYPE.get(key)
1051
1052 def format_enum_value(self, value: Any, enum_type: Type[enum.Enum]) -> str:
1053 """Format a value as an enum if possible."""
1054 try:
1055 if isinstance(value, (int, str)):
1056 enum_value = enum_type(value)
1057 return f"{enum_value.name} ({value})"
1058 except (ValueError, KeyError):
1059 pass
1060 return str(value)
1061
1062 def format_field_value(self, field: ReaderField) -> str:
1063 if not field.types:
1064 return "N/A"
1065
1066 if len(field.types) == 1:
1067 curr_type = field.types[0]
1068 if curr_type == GGUFValueType.STRING:
1069 return str(bytes(field.parts[-1]), encoding='utf-8')
1070 elif self.reader and curr_type in self.reader.gguf_scalar_to_np:
1071 value = field.parts[-1][0]
1072 # Check if this field has an enum type
1073 enum_type = self.get_enum_for_key(field.name)
1074 if enum_type is not None:
1075 return self.format_enum_value(value, enum_type)
1076 return str(value)
1077
1078 if field.types[0] == GGUFValueType.ARRAY:
1079 array_values = self.extract_array_values(field)
1080 render_element = min(5, len(array_values))
1081
1082 # Get enum type for this array if applicable
1083 enum_type = self.get_enum_for_key(field.name)
1084
1085 if enum_type is not None:
1086 array_elements = []
1087 for i in range(render_element):
1088 array_elements.append(self.format_enum_value(array_values[i], enum_type))
1089 else:
1090 array_elements = [str(array_values[i]) for i in range(render_element)]
1091
1092 return f"[ {', '.join(array_elements).strip()}{', ...' if len(array_values) > len(array_elements) else ''} ]"
1093
1094 return "Complex value"
1095
1096 def load_tensors(self):
1097 self.tensors_table.setRowCount(0)
1098
1099 if not self.reader:
1100 return
1101
1102 for i, tensor in enumerate(self.reader.tensors):
1103 self.tensors_table.insertRow(i)
1104
1105 # Name
1106 name_item = QTableWidgetItem(tensor.name)
1107 name_item.setFlags(name_item.flags() & ~Qt.ItemFlag.ItemIsEditable)
1108 self.tensors_table.setItem(i, 0, name_item)
1109
1110 # Type
1111 type_item = QTableWidgetItem(tensor.tensor_type.name)
1112 type_item.setFlags(type_item.flags() & ~Qt.ItemFlag.ItemIsEditable)
1113 self.tensors_table.setItem(i, 1, type_item)
1114
1115 # Shape
1116 shape_str = " ร ".join(str(d) for d in tensor.shape)
1117 shape_item = QTableWidgetItem(shape_str)
1118 shape_item.setFlags(shape_item.flags() & ~Qt.ItemFlag.ItemIsEditable)
1119 self.tensors_table.setItem(i, 2, shape_item)
1120
1121 # Elements
1122 elements_item = QTableWidgetItem(str(tensor.n_elements))
1123 elements_item.setFlags(elements_item.flags() & ~Qt.ItemFlag.ItemIsEditable)
1124 self.tensors_table.setItem(i, 3, elements_item)
1125
1126 # Size
1127 size_item = QTableWidgetItem(f"{tensor.n_bytes:,}")
1128 size_item.setFlags(size_item.flags() & ~Qt.ItemFlag.ItemIsEditable)
1129 self.tensors_table.setItem(i, 4, size_item)
1130
1131 def on_metadata_changed(self, item):
1132 if item.column() != 2: # Only handle value column changes
1133 return
1134
1135 row = item.row()
1136 orig_item = self.metadata_table.item(row, 0)
1137 key = None
1138 if orig_item:
1139 key = orig_item.text()
1140 new_value = item.text()
1141
1142 field = None
1143 if self.reader and key:
1144 field = self.reader.get_field(key)
1145 if not field or not field.types or not key:
1146 return
1147
1148 value_type = field.types[0]
1149
1150 # Check if this is an enum field
1151 enum_type = self.get_enum_for_key(key)
1152 if enum_type is not None and value_type == GGUFValueType.INT32:
1153 # Try to parse the enum value from the text
1154 try:
1155 # Check if it's a name
1156 try:
1157 enum_val = enum_type[new_value]
1158 converted_value = enum_val.value
1159 except (KeyError, AttributeError):
1160 # Check if it's a number or "NAME (value)" format
1161 if '(' in new_value and ')' in new_value:
1162 # Extract the value from "NAME (value)" format
1163 value_part = new_value.split('(')[1].split(')')[0].strip()
1164 converted_value = int(value_part)
1165 else:
1166 # Try to convert directly to int
1167 converted_value = int(new_value)
1168
1169 # Validate that it's a valid enum value
1170 enum_type(converted_value)
1171
1172 # Store the change
1173 self.metadata_changes[key] = (value_type, converted_value)
1174 self.modified = True
1175
1176 # Update display with formatted enum value
1177 formatted_value = self.format_enum_value(converted_value, enum_type)
1178 item.setText(formatted_value)
1179
1180 self.statusBar().showMessage(f"Changed {key} to {formatted_value}")
1181 return
1182 except (ValueError, KeyError) as e:
1183 QMessageBox.warning(
1184 self,
1185 f"Invalid Enum Value ({e})",
1186 f"'{new_value}' is not a valid {enum_type.__name__} value.\n"
1187 f"Valid values are: {', '.join(v.name for v in enum_type)}")
1188
1189 # Revert to original value
1190 original_value = self.format_field_value(field)
1191 item.setText(original_value)
1192 return
1193
1194 try:
1195 # Convert the string value to the appropriate type
1196 if value_type == GGUFValueType.UINT8:
1197 converted_value = np.uint8(int(new_value))
1198 elif value_type == GGUFValueType.INT8:
1199 converted_value = np.int8(int(new_value))
1200 elif value_type == GGUFValueType.UINT16:
1201 converted_value = np.uint16(int(new_value))
1202 elif value_type == GGUFValueType.INT16:
1203 converted_value = np.int16(int(new_value))
1204 elif value_type == GGUFValueType.UINT32:
1205 converted_value = np.uint32(int(new_value))
1206 elif value_type == GGUFValueType.INT32:
1207 converted_value = np.int32(int(new_value))
1208 elif value_type == GGUFValueType.FLOAT32:
1209 converted_value = np.float32(float(new_value))
1210 elif value_type == GGUFValueType.BOOL:
1211 converted_value = new_value.lower() in ('true', 'yes', '1')
1212 elif value_type == GGUFValueType.STRING:
1213 converted_value = new_value
1214 else:
1215 # Unsupported type for editing
1216 return
1217
1218 # Store the change
1219 self.metadata_changes[key] = (value_type, converted_value)
1220 self.modified = True
1221
1222 self.statusBar().showMessage(f"Changed {key} to {new_value}")
1223 except ValueError:
1224 QMessageBox.warning(self, "Invalid Value", f"The value '{new_value}' is not valid for type {value_type.name}")
1225
1226 # Revert to original value
1227 original_value = self.format_field_value(field)
1228 item.setText(original_value)
1229
1230 def remove_metadata(self):
1231 button = self.sender()
1232 key = button.property("key")
1233 row = button.property("row")
1234
1235 reply = QMessageBox.question(
1236 self, "Confirm Removal",
1237 f"Are you sure you want to remove the metadata key '{key}'?",
1238 QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No, QMessageBox.StandardButton.No
1239 )
1240
1241 if reply == QMessageBox.StandardButton.Yes:
1242 self.metadata_table.removeRow(row)
1243 self.metadata_to_remove.add(key)
1244
1245 # If we previously had changes for this key, remove them
1246 if key in self.metadata_changes:
1247 del self.metadata_changes[key]
1248
1249 self.modified = True
1250 self.statusBar().showMessage(f"Marked {key} for removal")
1251
1252 def edit_metadata_enum(self):
1253 """Edit an enum metadata field."""
1254 button = self.sender()
1255 key = button.property("key")
1256 row = button.property("row")
1257
1258 field = None
1259 if self.reader:
1260 field = self.reader.get_field(key)
1261 if not field or not field.types:
1262 return
1263
1264 enum_type = self.get_enum_for_key(key)
1265 if enum_type is None:
1266 return
1267
1268 # Get current value
1269 current_value = field.contents()
1270
1271 # Create a dialog with enum options
1272 dialog = QDialog(self)
1273 dialog.setWindowTitle(f"Select {enum_type.__name__} Value")
1274 layout = QVBoxLayout(dialog)
1275
1276 combo = QComboBox()
1277 for enum_val in enum_type:
1278 combo.addItem(f"{enum_val.name} ({enum_val.value})", enum_val.value)
1279
1280 # Set current value
1281 try:
1282 if isinstance(current_value, (int, str)):
1283 enum_val = enum_type(current_value)
1284 combo.setCurrentText(f"{enum_val.name} ({current_value})")
1285 except (ValueError, KeyError):
1286 pass
1287
1288 layout.addWidget(combo)
1289
1290 buttons = QDialogButtonBox(QDialogButtonBox.StandardButton.Ok | QDialogButtonBox.StandardButton.Cancel)
1291 buttons.accepted.connect(dialog.accept)
1292 buttons.rejected.connect(dialog.reject)
1293 layout.addWidget(buttons)
1294
1295 if dialog.exec() == QDialog.DialogCode.Accepted:
1296 # Get the selected value
1297 new_value = combo.currentData()
1298 enum_val = enum_type(new_value)
1299
1300 # Store the change
1301 self.metadata_changes[key] = (field.types[0], new_value)
1302 self.modified = True
1303
1304 # Update display
1305 display_text = f"{enum_val.name} ({new_value})"
1306 target_item = self.metadata_table.item(row, 2)
1307 if target_item:
1308 target_item.setText(display_text)
1309
1310 self.statusBar().showMessage(f"Changed {key} to {display_text}")
1311
1312 def edit_array_metadata(self):
1313 button = self.sender()
1314 key = button.property("key")
1315 row = button.property("row")
1316
1317 # Check if this is one of the linked tokenizer keys
1318 if key in TOKENIZER_LINKED_KEYS:
1319 self.edit_tokenizer_metadata(key)
1320 return
1321
1322 field = None
1323 if self.reader:
1324 field = self.reader.get_field(key)
1325 if not field or not field.types or field.types[0] != GGUFValueType.ARRAY:
1326 return
1327
1328 # Get array element type
1329 element_type = field.types[1]
1330
1331 # Extract array values
1332 array_values = self.extract_array_values(field)
1333
1334 # Open array editor dialog
1335 dialog = ArrayEditorDialog(array_values, element_type, key, self)
1336 if dialog.exec() == QDialog.DialogCode.Accepted:
1337 new_values = dialog.get_array_values()
1338
1339 # Store the change
1340 self.metadata_changes[key] = (GGUFValueType.ARRAY, (element_type, new_values))
1341 self.modified = True
1342
1343 # Update display
1344 enum_type = self.get_enum_for_key(key)
1345 if enum_type is not None and element_type == GGUFValueType.INT32:
1346 value_str = f"[ {', '.join(self.format_enum_value(v, enum_type) for v in new_values[:5])}{', ...' if len(new_values) > 5 else ''} ]"
1347 else:
1348 value_str = f"[ {', '.join(str(v) for v in new_values[:5])}{', ...' if len(new_values) > 5 else ''} ]"
1349 target_item = self.metadata_table.item(row, 2)
1350 if target_item:
1351 target_item.setText(value_str)
1352
1353 self.statusBar().showMessage(f"Updated array values for {key}")
1354
1355 def edit_tokenizer_metadata(self, trigger_key):
1356 """Edit the linked tokenizer metadata arrays together."""
1357 if not self.reader:
1358 return
1359
1360 # Get all three fields
1361 tokens_field = self.reader.get_field(gguf.Keys.Tokenizer.LIST)
1362 token_types_field = self.reader.get_field(gguf.Keys.Tokenizer.TOKEN_TYPE)
1363 scores_field = self.reader.get_field(gguf.Keys.Tokenizer.SCORES)
1364
1365 # Extract values from each field
1366 tokens = self.extract_array_values(tokens_field) if tokens_field else []
1367 token_types = self.extract_array_values(token_types_field) if token_types_field else []
1368 scores = self.extract_array_values(scores_field) if scores_field else []
1369
1370 # Apply any pending changes
1371 if gguf.Keys.Tokenizer.LIST in self.metadata_changes:
1372 _, (_, tokens) = self.metadata_changes[gguf.Keys.Tokenizer.LIST]
1373 if gguf.Keys.Tokenizer.TOKEN_TYPE in self.metadata_changes:
1374 _, (_, token_types) = self.metadata_changes[gguf.Keys.Tokenizer.TOKEN_TYPE]
1375 if gguf.Keys.Tokenizer.SCORES in self.metadata_changes:
1376 _, (_, scores) = self.metadata_changes[gguf.Keys.Tokenizer.SCORES]
1377
1378 # Open the tokenizer editor dialog
1379 dialog = TokenizerEditorDialog(tokens, token_types, scores, self)
1380 if dialog.exec() == QDialog.DialogCode.Accepted:
1381 new_tokens, new_token_types, new_scores = dialog.get_data()
1382
1383 # Store changes for all three arrays
1384 if tokens_field:
1385 self.metadata_changes[gguf.Keys.Tokenizer.LIST] = (
1386 GGUFValueType.ARRAY,
1387 (tokens_field.types[1], new_tokens)
1388 )
1389
1390 if token_types_field:
1391 self.metadata_changes[gguf.Keys.Tokenizer.TOKEN_TYPE] = (
1392 GGUFValueType.ARRAY,
1393 (token_types_field.types[1], new_token_types)
1394 )
1395
1396 if scores_field:
1397 self.metadata_changes[gguf.Keys.Tokenizer.SCORES] = (
1398 GGUFValueType.ARRAY,
1399 (scores_field.types[1], new_scores)
1400 )
1401
1402 self.modified = True
1403
1404 # Update display for all three fields
1405 self.update_tokenizer_display(gguf.Keys.Tokenizer.LIST, new_tokens)
1406 self.update_tokenizer_display(gguf.Keys.Tokenizer.TOKEN_TYPE, new_token_types)
1407 self.update_tokenizer_display(gguf.Keys.Tokenizer.SCORES, new_scores)
1408
1409 self.statusBar().showMessage("Updated tokenizer data")
1410
1411 def update_tokenizer_display(self, key, values):
1412 """Update the display of a tokenizer field in the metadata table."""
1413 for row in range(self.metadata_table.rowCount()):
1414 key_item = self.metadata_table.item(row, 0)
1415 if key_item and key_item.text() == key:
1416 value_str = f"[ {', '.join(str(v) for v in values[:5])}{', ...' if len(values) > 5 else ''} ]"
1417 value_item = self.metadata_table.item(row, 2)
1418 if value_item:
1419 value_item.setText(value_str)
1420 break
1421
1422 def add_metadata(self):
1423 dialog = AddMetadataDialog(self)
1424 if dialog.exec() == QDialog.DialogCode.Accepted:
1425 key, value_type, value = dialog.get_data()
1426
1427 if not key:
1428 QMessageBox.warning(self, "Invalid Key", "Key cannot be empty")
1429 return
1430
1431 # Check if key already exists
1432 for row in range(self.metadata_table.rowCount()):
1433 orig_item = self.metadata_table.item(row, 0)
1434 if orig_item and orig_item.text() == key:
1435 QMessageBox.warning(self, "Duplicate Key", f"Key '{key}' already exists")
1436 return
1437
1438 # Add to table
1439 row = self.metadata_table.rowCount()
1440 self.metadata_table.insertRow(row)
1441
1442 # Key
1443 key_item = QTableWidgetItem(key)
1444 key_item.setFlags(key_item.flags() & ~Qt.ItemFlag.ItemIsEditable)
1445 self.metadata_table.setItem(row, 0, key_item)
1446
1447 # Type
1448 type_item = QTableWidgetItem(value_type.name)
1449 type_item.setFlags(type_item.flags() & ~Qt.ItemFlag.ItemIsEditable)
1450 self.metadata_table.setItem(row, 1, type_item)
1451
1452 # Value
1453 value_item = QTableWidgetItem(str(value))
1454 value_item.setFlags(value_item.flags() | Qt.ItemFlag.ItemIsEditable)
1455 self.metadata_table.setItem(row, 2, value_item)
1456
1457 # Actions
1458 actions_widget = QWidget()
1459 actions_layout = QHBoxLayout(actions_widget)
1460 actions_layout.setContentsMargins(2, 2, 2, 2)
1461
1462 remove_button = QPushButton("Remove")
1463 remove_button.setProperty("row", row)
1464 remove_button.setProperty("key", key)
1465 remove_button.clicked.connect(self.remove_metadata)
1466 actions_layout.addWidget(remove_button)
1467
1468 self.metadata_table.setCellWidget(row, 3, actions_widget)
1469
1470 # Store the change
1471 self.metadata_changes[key] = (value_type, value)
1472 self.modified = True
1473
1474 self.statusBar().showMessage(f"Added new metadata key {key}")
1475
1476 def save_file(self):
1477 if not self.reader:
1478 QMessageBox.warning(self, "No File Open", "Please open a GGUF file first")
1479 return
1480
1481 if not self.modified and not self.metadata_changes and not self.metadata_to_remove:
1482 QMessageBox.information(self, "No Changes", "No changes to save")
1483 return
1484
1485 file_path, _ = QFileDialog.getSaveFileName(
1486 self, "Save GGUF File As", "", "GGUF Files (*.gguf);;All Files (*)"
1487 )
1488
1489 if not file_path:
1490 return
1491
1492 try:
1493 self.statusBar().showMessage(f"Saving to {file_path}...")
1494 QApplication.processEvents()
1495
1496 # Get architecture and endianness from the original file
1497 arch = 'unknown'
1498 field = self.reader.get_field(gguf.Keys.General.ARCHITECTURE)
1499 if field:
1500 arch = field.contents()
1501
1502 # Create writer
1503 writer = GGUFWriter(file_path, arch=arch, endianess=self.reader.endianess)
1504
1505 # Get alignment if present
1506 alignment = None
1507 field = self.reader.get_field(gguf.Keys.General.ALIGNMENT)
1508 if field:
1509 alignment = field.contents()
1510 if alignment is not None:
1511 writer.data_alignment = alignment
1512
1513 # Copy metadata with changes
1514 for field in self.reader.fields.values():
1515 # Skip virtual fields and fields written by GGUFWriter
1516 if field.name == gguf.Keys.General.ARCHITECTURE or field.name.startswith('GGUF.'):
1517 continue
1518
1519 # Skip fields marked for removal
1520 if field.name in self.metadata_to_remove:
1521 continue
1522
1523 # Apply changes if any
1524 sub_type = None
1525 if field.name in self.metadata_changes:
1526 value_type, value = self.metadata_changes[field.name]
1527 if value_type == GGUFValueType.ARRAY:
1528 # Handle array values
1529 sub_type, value = value
1530 else:
1531 # Copy original value
1532 value = field.contents()
1533 value_type = field.types[0]
1534 if value_type == GGUFValueType.ARRAY:
1535 sub_type = field.types[-1]
1536
1537 if value is not None:
1538 writer.add_key_value(field.name, value, value_type, sub_type=sub_type)
1539
1540 # Add new metadata
1541 for key, (value_type, value) in self.metadata_changes.items():
1542 # Skip if the key already existed (we handled it above)
1543 if self.reader.get_field(key) is not None:
1544 continue
1545
1546 sub_type = None
1547 if value_type == GGUFValueType.ARRAY:
1548 # Handle array values
1549 sub_type, value = value
1550
1551 writer.add_key_value(key, value, value_type, sub_type=sub_type)
1552
1553 # Add tensors (including data)
1554 for tensor in self.reader.tensors:
1555 writer.add_tensor(tensor.name, tensor.data, raw_shape=tensor.data.shape, raw_dtype=tensor.tensor_type, tensor_endianess=self.reader.endianess)
1556
1557 # Write header and metadata
1558 writer.open_output_file(Path(file_path))
1559 writer.write_header_to_file()
1560 writer.write_kv_data_to_file()
1561
1562 # Write tensor data using the optimized method
1563 writer.write_tensors_to_file(progress=False)
1564
1565 writer.close()
1566
1567 self.statusBar().showMessage(f"Saved to {file_path}")
1568
1569 # Ask if user wants to open the new file
1570 reply = QMessageBox.question(
1571 self, "Open Saved File",
1572 "Would you like to open the newly saved file?",
1573 QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No, QMessageBox.StandardButton.Yes
1574 )
1575
1576 if reply == QMessageBox.StandardButton.Yes:
1577 self.reader = GGUFReader(file_path, 'r')
1578 self.current_file = file_path
1579 self.file_path_edit.setText(file_path)
1580
1581 self.load_metadata()
1582 self.load_tensors()
1583
1584 self.metadata_changes = {}
1585 self.metadata_to_remove = set()
1586 self.modified = False
1587
1588 except Exception as e:
1589 QMessageBox.critical(self, "Error", f"Failed to save file: {str(e)}")
1590 self.statusBar().showMessage("Error saving file")
1591
1592
1593def main() -> None:
1594 parser = argparse.ArgumentParser(description="GUI GGUF Editor")
1595 parser.add_argument("model_path", nargs="?", help="path to GGUF model file to load at startup")
1596 parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
1597
1598 args = parser.parse_args()
1599
1600 logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
1601
1602 app = QApplication(sys.argv)
1603 window = GGUFEditorWindow()
1604 window.show()
1605
1606 # Load model if specified
1607 if args.model_path:
1608 if os.path.isfile(args.model_path) and args.model_path.endswith('.gguf'):
1609 window.load_file(args.model_path)
1610 else:
1611 logger.error(f"Invalid model path: {args.model_path}")
1612 QMessageBox.warning(
1613 window,
1614 "Invalid Model Path",
1615 f"The specified file does not exist or is not a GGUF file: {args.model_path}")
1616
1617 sys.exit(app.exec())
1618
1619
1620if __name__ == '__main__':
1621 main()