1 files changed, 150 insertions, 0 deletions
diff --git a/llama.cpp/tools/server/webui/src/lib/utils/pdf-processing.ts b/llama.cpp/tools/server/webui/src/lib/utils/pdf-processing.ts
new file mode 100644
index 0000000..84c456d
--- /dev/null
+++ b/llama.cpp/tools/server/webui/src/lib/utils/pdf-processing.ts
@@ -0,0 +1,150 @@
+/**
+ * PDF processing utilities using PDF.js
+ * Handles PDF text extraction and image conversion in the browser
+ */
+
+import { browser } from '$app/environment';
+import { MimeTypeApplication, MimeTypeImage } from '$lib/enums';
+import * as pdfjs from 'pdfjs-dist';
+
+type TextContent = {
+	items: Array<{ str: string }>;
+};
+
+if (browser) {
+	// Import worker as text and create blob URL for inline bundling
+	import('pdfjs-dist/build/pdf.worker.min.mjs?raw')
+		.then((workerModule) => {
+			const workerBlob = new Blob([workerModule.default], { type: 'application/javascript' });
+			pdfjs.GlobalWorkerOptions.workerSrc = URL.createObjectURL(workerBlob);
+		})
+		.catch(() => {
+			console.warn('Failed to load PDF.js worker, PDF processing may not work');
+		});
+}
+
+/**
+ * Convert a File object to ArrayBuffer for PDF.js processing
+ * @param file - The PDF file to convert
+ * @returns Promise resolving to the file's ArrayBuffer
+ */
+async function getFileAsBuffer(file: File): Promise<ArrayBuffer> {
+	return new Promise((resolve, reject) => {
+		const reader = new FileReader();
+		reader.onload = (event) => {
+			if (event.target?.result) {
+				resolve(event.target.result as ArrayBuffer);
+			} else {
+				reject(new Error('Failed to read file.'));
+			}
+		};
+		reader.onerror = () => {
+			reject(new Error('Failed to read file.'));
+		};
+		reader.readAsArrayBuffer(file);
+	});
+}
+
+/**
+ * Extract text content from a PDF file
+ * @param file - The PDF file to process
+ * @returns Promise resolving to the extracted text content
+ */
+export async function convertPDFToText(file: File): Promise<string> {
+	if (!browser) {
+		throw new Error('PDF processing is only available in the browser');
+	}
+
+	try {
+		const buffer = await getFileAsBuffer(file);
+		const pdf = await pdfjs.getDocument(buffer).promise;
+		const numPages = pdf.numPages;
+
+		const textContentPromises: Promise<TextContent>[] = [];
+
+		for (let i = 1; i <= numPages; i++) {
+			// eslint-disable-next-line @typescript-eslint/no-explicit-any
+			textContentPromises.push(pdf.getPage(i).then((page: any) => page.getTextContent()));
+		}
+
+		const textContents = await Promise.all(textContentPromises);
+		const textItems = textContents.flatMap((textContent: TextContent) =>
+			textContent.items.map((item) => item.str ?? '')
+		);
+
+		return textItems.join('\n');
+	} catch (error) {
+		console.error('Error converting PDF to text:', error);
+		throw new Error(
+			`Failed to convert PDF to text: ${error instanceof Error ? error.message : 'Unknown error'}`
+		);
+	}
+}
+
+/**
+ * Convert PDF pages to PNG images as data URLs
+ * @param file - The PDF file to convert
+ * @param scale - Rendering scale factor (default: 1.5)
+ * @returns Promise resolving to array of PNG data URLs
+ */
+export async function convertPDFToImage(file: File, scale: number = 1.5): Promise<string[]> {
+	if (!browser) {
+		throw new Error('PDF processing is only available in the browser');
+	}
+
+	try {
+		const buffer = await getFileAsBuffer(file);
+		const doc = await pdfjs.getDocument(buffer).promise;
+		const pages: Promise<string>[] = [];
+
+		for (let i = 1; i <= doc.numPages; i++) {
+			const page = await doc.getPage(i);
+			const viewport = page.getViewport({ scale });
+			const canvas = document.createElement('canvas');
+			const ctx = canvas.getContext('2d');
+
+			canvas.width = viewport.width;
+			canvas.height = viewport.height;
+
+			if (!ctx) {
+				throw new Error('Failed to get 2D context from canvas');
+			}
+
+			const task = page.render({
+				canvasContext: ctx,
+				viewport: viewport,
+				canvas: canvas
+			});
+			pages.push(
+				task.promise.then(() => {
+					return canvas.toDataURL(MimeTypeImage.PNG);
+				})
+			);
+		}
+
+		return await Promise.all(pages);
+	} catch (error) {
+		console.error('Error converting PDF to images:', error);
+		throw new Error(
+			`Failed to convert PDF to images: ${error instanceof Error ? error.message : 'Unknown error'}`
+		);
+	}
+}
+
+/**
+ * Check if a file is a PDF based on its MIME type
+ * @param file - The file to check
+ * @returns True if the file is a PDF
+ */
+export function isPdfFile(file: File): boolean {
+	return file.type === MimeTypeApplication.PDF;
+}
+
+/**
+ * Check if a MIME type represents a PDF
+ * @param mimeType - The MIME type to check
+ * @returns True if the MIME type is application/pdf
+ */
+export function isApplicationMimeType(mimeType: string): boolean {
+	return mimeType === MimeTypeApplication.PDF;
+}