text_parser.py 750 Bytes
Newer Older
kihoon.lee's avatar
upload  
kihoon.lee committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import chardet

class TextParser:
    def parse(file_path: str) -> str:
        # 먼저 파일의 인코딩을 감지
        with open(file_path, 'rb') as file:
            raw_data = file.read()
            result = chardet.detect(raw_data)
            encoding = result['encoding']
        
        # 감지된 인코딩으로 파일 읽기
        if encoding:
            try:
                with open(file_path, 'r', encoding=encoding) as file:
                    text = file.read()
                return text
            except UnicodeDecodeError:
                raise ValueError(f"Could not decode the file with the detected encoding: {encoding}")
        else:
            raise ValueError("Could not detect the encoding of the file.")