import gradio as gr import os, pickle from text_extractor import OCRProcessor import shutil from loguru import logger class DocumentClassifier: def __init__(self): self.ocr_processor = OCRProcessor() with open('model/lr_classifier_v1.pkl', 'rb') as doc_cat_file: self.model = pickle.load(doc_cat_file) # Create temporary directories self.temp_folder = 'temp_files' self.temp_output = 'temp_output' os.makedirs(self.temp_folder, exist_ok=True) os.makedirs(self.temp_output, exist_ok=True) self.label_mapper = { 0: 'cable', 1: 'fuses', 2: 'lighting', 3: 'others' } def cleanup(self): """Clean up temporary files""" shutil.rmtree(self.temp_folder, ignore_errors=True) shutil.rmtree(self.temp_output, ignore_errors=True) def process_document(self, file): try: file_path = file.name # Perform OCR raw_text = self.ocr_processor.perform_ocr( file_path, self.temp_output ) if not raw_text: return "No text could be extracted from the document" predicted_probabilities = self.model.predict_proba([raw_text])[0] predicted_category_index = predicted_probabilities.argmax() predicted_category = self.label_mapper[predicted_category_index] confidence_score = predicted_probabilities[predicted_category_index] self.cleanup() return { 'Classification': predicted_category, 'Confidence Score': str(round(confidence_score, 2)) } except Exception as e: logger.error(f"Error processing document: {str(e)}") self.cleanup() return f"Error processing document: {str(e)}" classifier = DocumentClassifier() def classify_document(file): result = classifier.process_document(file) return result['Classification'], result['Confidence Score'] iface = gr.Interface( fn=classify_document, inputs=gr.File(label="Upload PDF or Image"), outputs=[ gr.Label(label="Classification"), gr.Label(label="Confidence Score") ], title="📄 Smart Document Classifier", description="Upload your PDF or image documents and let AI classify them automatically into categories: cable, fuses, lighting, or others.", theme=gr.themes.Citrus(), examples=[ ["examples/cyp_specs.pdf"] ], css=""" .gradio-container { font-family: 'Quicksand', sans-serif !important; } .gr-button { font-weight: 600; } """ ) if __name__ == "__main__": iface.launch(server_name="0.0.0.0", server_port=7860)