5.3 儲存至SQLite

這裡會延續前一小節的ezprice爬蟲的內容. 有時候, 你可能不喜歡CSV檔案, 而比較喜歡把資料存到DB裡面, 這時候就可以考慮採用以下這隻爬蟲的做法. 這隻爬蟲會把前一小節產生的csv檔案當作輸入, 並把當中的資料讀出來並且儲存到資料庫裡面. 這邊使用SQLite作為範例資料庫.

from ch5.domain.item import Item
import sqlite3
import csv


DB_NAME = 'db.sqlite'
DROP_TABLE_COMMAND = 'DROP TABLE %s'
CHECK_TABLE_COMMAND = 'SELECT name FROM sqlite_master WHERE type=\'table\' AND name=\'%s\';'
FETCH_ALL_RECORD_COMMAND = 'SELECT * FROM %s;'


def connect_db(db_file):
    return sqlite3.connect(db_file)


def execute_command(connection, sql_cmd):
    cursor = connection.cursor()
    cursor.execute(sql_cmd)
    connection.commit()


def table_exists(connection, table_name):
    cursor = connection.cursor()
    cursor.execute(CHECK_TABLE_COMMAND % table_name)
    result = cursor.fetchone()
    if result is None:
        return False
    else:
        return True


def create_table(connection, table_name):
    create_table_cmd = 'CREATE TABLE %s (id INTEGER PRIMARY KEY AUTOINCREMENT, item TEXT, price INTEGER, shop TEXT)' % table_name
    if not table_exists(connection, table_name):
        print('Table \'%s\' does not exist, creating...' % table_name)
        execute_command(connection, create_table_cmd)
        print('Table \'%s\' created.' % table_name)
    else:
        execute_command(connection, DROP_TABLE_COMMAND % table_name)
        print('Table \'%s\' already exists, initializing...' % table_name)
        execute_command(connection, create_table_cmd)
        print('Table \'%s\' created.' % table_name)


def insert_data(connection, table_name, item):
    insert_record_cmd = 'INSERT INTO %s (item, price, shop) VALUES ("%s", %d, "%s")' % (table_name, item.name, item.price, item.shop)
    execute_command(connection, insert_record_cmd)


def update_data(connection, table_name):
    update_record_cmd = 'UPDATE %s SET shop = "udn買東西2" where shop="udn買東西"' % table_name
    execute_command(connection, update_record_cmd)


def insert_bulk_record(connection, table_name, input_file):
    with open(input_file, 'r', encoding='UTF-8') as file:
        reader = csv.DictReader(file)
        for row in reader:
            insert_record_cmd = 'INSERT INTO %s (item, price, shop) VALUES ("%s", "%s", "%s")' % (table_name, row['Item'], row['Price'], row['Store'])
            execute_command(connection, insert_record_cmd)


def fetch_all_record_from_db(connection, sql_cmd):
    cursor = connection.cursor()
    cursor.execute(sql_cmd)
    rows = cursor.fetchall()
    for row in rows:
        print(row)


def main():
    connection = connect_db(DB_NAME)
    table_name = 'record'
    input_file = 'ezprice.csv'
    item = Item('嚕嚕抱枕', 999, '嚕嚕小朋友')
    try:
        create_table(connection, table_name)
        insert_data(connection, table_name, item)
        insert_bulk_record(connection, table_name, input_file)
        update_data(connection, table_name)
        fetch_all_record_from_db(connection, FETCH_ALL_RECORD_COMMAND % table_name)
        connection.close()
    except Exception as exception:
        print('Encounter some exceptions while executing DB tasks, close the connection...')
        print('Exception message: ' + exception.__str__())
        connection.close()

if __name__ == '__main__':
    main()

輸出結果:

Table 'record' already exists, initializing...
Table 'record' created.
(1, '嚕嚕抱枕', 999, '嚕嚕小朋友')
(2, '妖怪手錶【吉胖喵】頭型抱枕', 590, 'GOHAPPY')
(3, '妖怪手錶【吉胖喵】頭型抱枕', 590, 'GOHAPPY')
(4, '《童趣植栽》【Light+Bio】超萌妖怪苔球-吉胖喵', 448, 'myfone購物')
(5, '妖怪手錶手機擦吊飾。吉胖喵', 199, 'GOHAPPY')
(6, '【 日本 BANDAI 】妖怪手錶 絨毛娃娃 吉胖喵 MM651651', 638, '百利市購物中心')
(7, '妖怪手錶 - 吉胖喵氣球', 999, 'Yahoo奇摩購物中心')
(8, '【BANDAI】妖怪轉蛋(吉胖喵篇)', 375, 'momo富邦購物館')
(9, '妖怪手錶 QQ吉胖喵吊飾', 148, '博客來')
(10, '下殺-BANDAI 妖怪轉蛋 吉胖喵篇', 299, 'ibon mart')
(11, '妖怪手錶 吉胖喵氣球', 1295, '博客來')
(12, '妖怪手錶手機束口袋。吉胖喵', 235, 'GOHAPPY')
(13, '妖怪手錶手機擦吊飾。吉胖喵', 199, 'GOHAPPY')
(14, '妖怪手錶【吉胖喵】頭型暖手抱枕', 650, 'GOHAPPY')
(15, '妖怪手錶毛絨護腕。吉胖喵', 285, 'GOHAPPY')
(16, '妖怪手錶金屬吊飾。吉胖喵', 199, 'GOHAPPY')
(17, '【妖怪手錶】吉胖喵疊疊樂', 1100, '全家行動購')
(18, '妖怪手錶手機束口袋。吉胖喵', 235, 'GOHAPPY')
(19, '妖怪手錶手機擦吊飾。吉胖喵', 199, 'GOHAPPY')
(20, '妖怪手錶金牌吊飾。吉胖喵', 199, 'GOHAPPY')
(21, '妖怪手錶金屬吊飾。吉胖喵', 199, 'GOHAPPY')
(22, '【Nano Block迷你積木 】NBH-094妖怪手錶吉胖喵', 479, 'friDay購物')
(23, '【妖怪手錶】手機擦吊飾(吉胖喵)', 199, 'momo富邦購物館')
(24, '【迎光】超萌妖怪苔球-吉胖喵', 448, '博客來')
(25, 'ENSKY 3D 立體拼圖 KM-72 吉胖喵 透明版 日版', 370, 'Yahoo奇摩購物中心')
(26, '妖怪手錶毛絨護腕。吉胖喵', 285, 'GOHAPPY')
(27, '妖怪手錶手機擦吊飾。吉胖喵', 199, 'GOHAPPY')
(28, '妖怪手錶金牌吊飾。吉胖喵', 199, 'GOHAPPY')
(29, '【BabyTiger虎兒寶】卡通造型幼教兒童睡袋- 妖怪手錶-吉胖喵', 1088, 'GOHAPPY')
(30, '妖怪手錶趴姿毛絨公仔。吉胖喵', 720, 'GOHAPPY')
(31, '妖怪手錶警報器吊飾。吉胖喵', 565, 'GOHAPPY')
(32, '妖怪手錶毛絨小公仔。吉胖喵', 540, 'GOHAPPY')
(33, '【波克貓哈日網】妖怪手錶系列◇造型相框◇《吉胖喵》', 465, 'GOHAPPY')
(34, '妖怪手錶毛絨公仔筆袋。吉胖喵', 450, 'GOHAPPY')
(35, '妖怪手錶毛萬用小包。吉胖喵', 350, 'GOHAPPY')
(36, '妖怪手錶金屬人形吊飾。吉胖喵', 199, 'GOHAPPY')
(37, '【BabyTiger虎兒寶】卡通造型幼教兒童睡袋- 妖怪手錶-吉胖喵', 1088, 'GOHAPPY')
(38, '妖怪手錶兒童連身雨衣。吉胖喵', 850, 'GOHAPPY')
(39, '【妖怪手錶】錶數位收納包(吉胖喵)', 299, 'momo富邦購物館')
(40, '妖怪手錶 - 吉胖喵3D立體小拼圖', 249, 'Yahoo奇摩購物中心')
(41, '妖怪手錶 妖怪徽章變形 吉胖喵', 108, '博客來')
(42, '童鞋城堡-妖怪手錶 中大童 吉胖喵造型運動鞋YW6501-藍', 890, 'ibon mart')
(43, '妖怪手錶毛絨小公仔。吉胖喵', 540, 'GOHAPPY')
(44, '童鞋城堡-妖怪手錶 中大童 吉胖喵造型運動鞋YW6501-藍', 890, 'GOHAPPY')
(45, '妖怪手錶兒童連身雨衣。吉胖喵', 850, 'GOHAPPY')
(46, '妖怪手錶趴姿毛絨公仔。吉胖喵', 720, 'GOHAPPY')
(47, '妖怪手錶萬用收納包。吉胖喵', 530, 'GOHAPPY')
(48, '妖怪手錶萬用收納包。吉胖喵', 530, 'GOHAPPY')
(49, '妖怪手錶公仔吊飾零錢車票夾。吉胖喵', 450, 'GOHAPPY')
(50, '妖怪手錶毛萬用小包。黑邊 吉胖喵', 350, 'GOHAPPY')
(51, '妖怪手錶毛絨鑰匙套吊飾。吉胖喵', 230, 'GOHAPPY')
(52, '妖怪手錶鑰匙套吊飾。吉胖喵', 199, 'GOHAPPY')
(53, '妖怪手錶療癒球形吊飾。吉胖喵', 199, 'GOHAPPY')
(54, '童鞋城堡-妖怪手錶 中童 吉胖喵運動鞋YW8844-藍', 690, 'GOHAPPY')
(55, '妖怪手錶警報器吊飾。吉胖喵', 565, 'GOHAPPY')
(56, '【妖怪手錶】吉胖喵與小石獅100%純棉條紋浴巾', 502, 'momo富邦購物館')
(57, '童鞋城堡-妖怪手錶 中童 吉胖喵夾腳拖鞋YW2116-黃', 290, 'ibon mart')
(58, '妖怪手錶兒童連身雨衣。吉胖喵', 850, '博客來')
(59, '妖怪手錶 吉胖喵與小石獅100%純棉條紋浴巾', 590, 'Yahoo奇摩購物中心')
(60, '妖怪手錶毛絨公仔筆袋。吉胖喵', 450, 'GOHAPPY')
(61, '妖怪手錶毛萬用小包。黑邊 吉胖喵', 350, 'GOHAPPY')
(62, '妖怪手錶毛絨鑰匙套吊飾。吉胖喵', 230, 'GOHAPPY')
(63, '妖怪手錶鑰匙套吊飾。吉胖喵', 199, 'GOHAPPY')
(64, '妖怪手錶金屬人形吊飾。吉胖喵', 199, 'GOHAPPY')
(65, '童鞋城堡-妖怪手錶 中大童 吉胖喵電燈運動鞋YW6508-藍', 890, 'GOHAPPY')
(66, '妖怪手錶公仔吊飾零錢車票夾。吉胖喵', 450, 'GOHAPPY')
(67, '妖怪手錶毛萬用小包。吉胖喵', 350, 'GOHAPPY')
(68, '妖怪手錶數位收納包。吉胖喵', 299, 'GOHAPPY')
(69, '妖怪手錶金屬人形吊飾。吉胖喵', 199, 'GOHAPPY')
(70, '妖怪手錶療癒球形吊飾。吉胖喵', 199, 'GOHAPPY')
(71, 'BABYTIGER 【虎兒寶】卡通造型幼教兒童睡袋- 妖怪手錶-吉胖喵', 829, 'watsons屈臣氏')
(72, '妖怪手錶數位收納包。吉胖喵', 299, 'GOHAPPY')
(73, '妖怪手錶金屬人形吊飾。吉胖喵', 199, 'GOHAPPY')
(74, '【妖怪手錶】吉胖喵武士慶典單人床包二件組 3.5x6.2尺(105x186公分)', 549, 'GOHAPPY')
(75, '【日本進口】妖怪手錶 吉胖喵 票夾零錢/收納包-黃色', 399, '博客來')
(76, '童鞋城堡-妖怪手錶 中童 吉胖喵夾腳拖鞋YW2149-藍', 390, 'ibon mart')
(77, '妖怪手錶兒童連身雨衣。吉胖喵', 850, 'Yahoo奇摩購物中心')
(78, '【妖怪手錶】可愛圖騰兒童睡袋(吉胖喵喵版--藍)', 850, 'momo富邦購物館')

Process finished with exit code 0

DB內容:

原始碼點我

Last updated