from .pdf_parser import PDFParser from .ppt_parser import PPTParser from .word_parser import WordParser from .excel_parser import ExcelParser from .text_parser import TextParser from modules.ocr import ReaderForEasyOCR import os class FileParserFactory: def __init__(self, use_ocr: bool, ocr_reader: ReaderForEasyOCR = None): self.use_ocr = use_ocr self.ocr_reader = ocr_reader def get_parser(self, file_path: str): file_extension = os.path.splitext(file_path)[1].lower() # 확장자만 소문자로 변환 if file_extension == '.pdf': return PDFParser(use_ocr=self.use_ocr, ocr_reader=self.ocr_reader) elif file_extension == '.pptx': return PPTParser(use_ocr=self.use_ocr, ocr_reader=self.ocr_reader) elif file_extension == '.docx': return WordParser(use_ocr=self.use_ocr, ocr_reader=self.ocr_reader) elif file_extension in ['.xlsx', '.xls', '.csv']: return ExcelParser(use_ocr=self.use_ocr, ocr_reader=self.ocr_reader) elif file_extension == '.txt': return TextParser() else: raise ValueError("Unsupported file format")