__init__.py 1.15 KB
Newer Older
kihoon.lee's avatar
upload  
kihoon.lee committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from .pdf_parser import PDFParser
from .ppt_parser import PPTParser
from .word_parser import WordParser
from .excel_parser import ExcelParser
from .text_parser import TextParser
from modules.ocr import ReaderForEasyOCR
import os

class FileParserFactory:
    def __init__(self, use_ocr: bool, ocr_reader: ReaderForEasyOCR = None):
        self.use_ocr = use_ocr
        self.ocr_reader = ocr_reader

    def get_parser(self, file_path: str):
        file_extension = os.path.splitext(file_path)[1].lower()  # 확장자만 소문자로 변환
        if file_extension == '.pdf':
            return PDFParser(use_ocr=self.use_ocr, ocr_reader=self.ocr_reader)
        elif file_extension == '.pptx':
            return PPTParser(use_ocr=self.use_ocr, ocr_reader=self.ocr_reader)
        elif file_extension == '.docx':
            return WordParser(use_ocr=self.use_ocr, ocr_reader=self.ocr_reader)
        elif file_extension in ['.xlsx', '.xls', '.csv']:
            return ExcelParser(use_ocr=self.use_ocr, ocr_reader=self.ocr_reader)
        elif file_extension == '.txt':
            return TextParser()
        else:
            raise ValueError("Unsupported file format")