5.3 儲存至SQLite

這裡會延續前一小節的ezprice爬蟲的內容. 有時候, 你可能不喜歡CSV檔案, 而比較喜歡把資料存到DB裡面, 這時候就可以考慮採用以下這隻爬蟲的做法. 這隻爬蟲會把前一小節產生的csv檔案當作輸入, 並把當中的資料讀出來並且儲存到資料庫裡面. 這邊使用SQLite作為範例資料庫.

from ch5.domain.item import Item
import sqlite3
import csv


DB_NAME = 'db.sqlite'
DROP_TABLE_COMMAND = 'DROP TABLE %s'
CHECK_TABLE_COMMAND = 'SELECT name FROM sqlite_master WHERE type=\'table\' AND name=\'%s\';'
FETCH_ALL_RECORD_COMMAND = 'SELECT * FROM %s;'


def connect_db(db_file):
    return sqlite3.connect(db_file)


def execute_command(connection, sql_cmd):
    cursor = connection.cursor()
    cursor.execute(sql_cmd)
    connection.commit()


def table_exists(connection, table_name):
    cursor = connection.cursor()
    cursor.execute(CHECK_TABLE_COMMAND % table_name)
    result = cursor.fetchone()
    if result is None:
        return False
    else:
        return True


def create_table(connection, table_name):
    create_table_cmd = 'CREATE TABLE %s (id INTEGER PRIMARY KEY AUTOINCREMENT, item TEXT, price INTEGER, shop TEXT)' % table_name
    if not table_exists(connection, table_name):
        print('Table \'%s\' does not exist, creating...' % table_name)
        execute_command(connection, create_table_cmd)
        print('Table \'%s\' created.' % table_name)
    else:
        execute_command(connection, DROP_TABLE_COMMAND % table_name)
        print('Table \'%s\' already exists, initializing...' % table_name)
        execute_command(connection, create_table_cmd)
        print('Table \'%s\' created.' % table_name)


def insert_data(connection, table_name, item):
    insert_record_cmd = 'INSERT INTO %s (item, price, shop) VALUES ("%s", %d, "%s")' % (table_name, item.name, item.price, item.shop)
    execute_command(connection, insert_record_cmd)


def update_data(connection, table_name):
    update_record_cmd = 'UPDATE %s SET shop = "udn買東西2" where shop="udn買東西"' % table_name
    execute_command(connection, update_record_cmd)


def insert_bulk_record(connection, table_name, input_file):
    with open(input_file, 'r', encoding='UTF-8') as file:
        reader = csv.DictReader(file)
        for row in reader:
            insert_record_cmd = 'INSERT INTO %s (item, price, shop) VALUES ("%s", "%s", "%s")' % (table_name, row['Item'], row['Price'], row['Store'])
            execute_command(connection, insert_record_cmd)


def fetch_all_record_from_db(connection, sql_cmd):
    cursor = connection.cursor()
    cursor.execute(sql_cmd)
    rows = cursor.fetchall()
    for row in rows:
        print(row)


def main():
    connection = connect_db(DB_NAME)
    table_name = 'record'
    input_file = 'ezprice.csv'
    item = Item('嚕嚕抱枕', 999, '嚕嚕小朋友')
    try:
        create_table(connection, table_name)
        insert_data(connection, table_name, item)
        insert_bulk_record(connection, table_name, input_file)
        update_data(connection, table_name)
        fetch_all_record_from_db(connection, FETCH_ALL_RECORD_COMMAND % table_name)
        connection.close()
    except Exception as exception:
        print('Encounter some exceptions while executing DB tasks, close the connection...')
        print('Exception message: ' + exception.__str__())
        connection.close()

if __name__ == '__main__':
    main()

輸出結果:

DB內容:

原始碼點我

Last updated

Was this helpful?