[ 작업 환경 ]

Centos 7.3 & Python 3.7.5

 

 

insert_store_information_with_encoding_ver1.0.1.py
0.01MB

( raw data는 용량때문에 업로드가 불가능하여 없습니다. 참고해주세요. )

 

 

# -*- codeing: utf-8 -*-
#################### 패치노트 ####################
#    2019-11-26	  ver1.0.0	파일 생성
#    2019-11-27	  ver1.0.0	완성
#    2019-11-28	  ver1.0.0	ISO txt 파일 인코딩 기능 추가
#				ISO 파일인지 물어보고, 맞다고 하면 인코딩 진행
#    2019-12-01	  ver1.0.1	'(우)' 인식 기능 강화, 이름/좌표 '/' 인식 기능 강화
#				DB insert 직전에 넣을지 물어보는 기능 추가
#				데이터 가공시 warning, error 사항에 대해 사용자에게 reporting해주는 기능 추가
##################################################

#################### 변수1 ####################
store_list_location = '매장조사애살_20191128'     #   가게 폴더들 리스트의 위치
txt_file_name = '정보.txt'      # rawdata 정보 들어있는 파일 명
##################################################

#################### 변수2 ####################
target_host = 'localhost'
target_DB = 'test_ver100'
target_table = 'store_information'
target_port = 33906
target_user = 'testuser'
target_passwd = 'testpassword'
##################################################

######################## 추가 작업 필요 여부 & 필요 작업 선택 ###########################

answer = str(input('\n[INFO] Do you need any additional work like encoding?\n Your Answer(y/n) : '))
import time
if answer == 'n':
    mode = 'original'
else:
    answer = str(input('\n[INFO] What do you need? Please press only number.\n1. Encoding from "IOS" to "utf-8".\n\n Your Answer(1) : '))
    mode = answer
    if answer != '1':
        print('Press "Ctrl + C" For Stop This Program.')
        time.sleep(9999)

import os

def replace_all(data, old, new):
    for i in range(data.count(old)):
        data = data.replace(old, new)
    return data

######################## IOS 인코딩 -> UTF-8 ###########################

if mode == '1':

    #os.chdir(store_list_location)
    store_list = os.listdir(store_list_location + '/')
    
    os.system('rm store_list.txt')
    
    # RAW DATA 읽기
    for each_store in store_list:
        each_store = replace_all(each_store, ' ', '\ ')
        each_store = replace_all(each_store, '\\\\', '\\')
        each_store = replace_all(each_store, '&', '\&')
        if each_store != 'Thumbs.db':
            with open('store_list.txt', 'a', encoding='utf8') as f:
                f.write(each_store + '\n')
    
    
    store_list=open('store_list.txt', 'r')
    for each_store in store_list:
        each_store=each_store.strip()
        print('[INFO] ' + each_store + ' Encoding Transfering "ISO-8859" To "UTF-8" Working.')
        os.system('cp ' + store_list_location + '/' + each_store + '/' + txt_file_name + ' ' + store_list_location + '/' + each_store + '/before_encode.txt')
        os.system('iconv -c -f euc-kr -t utf-8 ' + store_list_location + '/' + each_store + '/before_encode.txt > ' + store_list_location + '/' + each_store + '/' + txt_file_name)
        os.system('rm ' + store_list_location + '/'  + each_store + '/before_encode.txt')
        
    print('[INFO] Encoding Data Success.')
    
    ########################################################################

#os.chdir(store_list_location)
store_list = os.listdir(store_list_location + '/')

total_data_list = list()
DB_insert_Data = list()

# RAW DATA 읽기
for each_store in store_list:
    if each_store != 'Thumbs.db':
        print(each_store)
    
        with open(store_list_location + '/' + each_store + '/' + txt_file_name, 'r', encoding='utf-8') as raw_information:
            lines = list()
            while True:
                temp = raw_information.readline()
                if not temp:
                    break
                elif temp == '\n' or temp == ' ' or temp == '  ':
                    pass
                elif temp == 'Thumbs.db':
                    pass
                else:
                    temp = temp.replace('\n', '')
                    lines.append(temp)
        total_data_list.append(lines)
print('[INFO] Reading Data Success.')

# DATA 가공
for i, each_store in enumerate(total_data_list):
    #print('[INFO] Touching about this file : ' + str(each_store))
    if '/' in each_store[0]:
        name = each_store[0].split('/')[0]
        if ':' in each_store[0].split('/')[1] or ' - ' in each_store[0].split('/')[1] or '~' in each_store[0].split('/')[1]:
            coordiname = ''
            print('[ERROR] ' + each_store[0] + 'coordinate value something strange.')
        else:
            coordinate = each_store[0].split('/')[1]
    else:
        name = each_store[0]
        coordinate = ''
        print('[WARNING] ' + each_store[0] + 'coordinate value NULL')

    if '37.' in each_store[2] or ',127.' in each_store[1]:
        road_name_address_kor = ''
    else:
        road_name_address_kor = each_store[1]
    if ' (우)' in each_store[2] and '지번주소)' in each_store[2]:
        location_old_kor = each_store[2].split('지번주소)')[1].split(' (우) ')[0]
        postcode = each_store[2].split(' (우)')[1]
    elif ' (우)' in each_store[2] and '지번주소)' not in each_store[2] and '지번)' not in each_store[2]:
        location_old_kor = each_store[2].split(' (우)')[0]
        postcode = each_store[2].split(' (우)')[1]
    elif ' (우)' in each_store[2] and '지번)' in each_store[2]:
        location_old_kor = each_store[2].split('지번)')[1].split(' (우)')[0]
        postcode = each_store[2].split(' (우)')[1]
    elif ' (우)' in each_store[2] and '지번)' not in each_store[2]:
        location_old_kor = each_store[2].split(' (우)')[0]
        postcode = each_store[2].split(' (우) ')[1]
    elif ' (우)' not in each_store[2] and '지번주소)' in each_store[2]:
        location_old_kor = each_store[2].split('지번주소)')[1]
        postcode = ''
    elif '지번)' not in each_store[2] and '지번' in each_store[2]:
        location_old_kor = each_store[2].split('지번')[1]
        postcode = ''
    else:
        try:
            location_old_kor = each_store[2].split('지번)')[1]
            postcode = ''
            print('[WARNING] ' + each_store[0] + ' has no postcode.')
        except IndexError:
            locartion_old_kor = each_store[2]
            postcode = ''
            print('[ERROR] ' + each_store[0] + ' has no postcode and no some location.')
    try:
        call_num = each_store[3]
    except IndexError:
        call_num = ''
        print('[ERROR] ' + each_store[0] + ' has no call_number.')
    try:
        opening_hours = each_store[4]
        lines_of_openinghours = 0
        while True:
            lines_of_openinghours -= 1
            if opening_hours != each_store[lines_of_openinghours]:
                opening_hours += '\n' + each_store[lines_of_openinghours]
            else:
                break
            
    except IndexError:
        opening_hours = ''
        print('[WARNING] ' + each_store[0] + ' has no opening hours.')
    DB_insert_Data.append([call_num.strip(), name.strip(), road_name_address_kor.strip(), location_old_kor.strip(), postcode.strip(), coordinate.strip(), opening_hours.strip()])

print('[INFO] Insert_Data Setting Complete.')

answer = str(input('\n[INFO] Do you want to insert Data into DB?\n Your Answer(y/n) : '))
if answer == 'n':
    print('Press "Ctrl + C" For Stop This Program.')
    time.sleep(9999)
else:
    print('[INFO] Inserting Data Start.')

# DB 접속 & Insert
import pymysql

conn = pymysql.connect(host=target_host, port=target_port, db=target_DB, user=target_user, passwd=target_passwd, charset='utf8')
curdic = conn.cursor(pymysql.cursors.DictCursor)
insert_store_information_sql = "INSERT INTO store_information (call_num, name, road_name_address_kor, location_old_kor, postcode, coordinate, opening_hours) VALUES (%s, %s, %s, %s, %s, %s, %s)"
cur = conn.cursor()
print('[INFO] DB Connection Success.')
for each_store in range(len(total_data_list)):
    cur.execute(insert_store_information_sql, DB_insert_Data[each_store])
print('[INFO] Data Insert Success.')
conn.commit()
print('[INFO] DB Disconnection Success. Working Complete.')

 

 

 

 

<<< 문제 발생 ERROR 해결 방법 >>>

 

1. DB 컬럼의 글자수 제한으로 인한 문제

위와 같이 정상적으로 insert는 되었으나, Data truncated for column '~~~' 발생시, 데이터의 입력값이 테이블의 컬럼에서 수용하는 데이터 길이보다 길기 때문에 발생합니다. DB 테이블 자체에서 해당 필드의 최대 길이를 증가시켜주면 해결됩니다.

깔끔하게 해결되었습니다.

 

 

 

+ Recent posts