3.2 Yahoo奇摩電影本週新片

Yahoo電影也算滿好爬的, 這邊要做的事情是把本週新片及其相關資訊都列出來, 並且寫入json檔案裡:

import requests
import re
import json
from bs4 import BeautifulSoup


Y_MOVIE_URL = 'https://tw.movies.yahoo.com/movie_thisweek.html'

Y_MOVIE_INFO_URL = 'https://tw.movies.yahoo.com/movieinfo_main.htm'
Y_MOVIE_PHOTO_URL = 'https://tw.movies.yahoo.com/movieinfo_photos.html'
Y_MOVIE_TIME_URL = 'https://tw.movies.yahoo.com/movietime_result.html'


def get_web_page(url):
    resp = requests.get(url)
    if resp.status_code != 200:
        print('Invalid url: ', resp.url)
        return None
    else:
        return resp.text


def get_movies(dom):
    soup = BeautifulSoup(dom, 'html5lib')
    movies = []
    rows = soup.find_all('div', 'clearfix row')
    for row in rows:
        movie = dict()
        movie['expectation'] = row.find(id='ymvle').find('div', 'bd clearfix ').em.text
        movie['ch_name'] = row.find('div', 'text').h4.text
        movie['en_name'] = row.find('div', 'text').h5.text
        movie['movie_id'] = get_movie_id(row.find('div', 'text').h4.a['href'])
        movie['poster_url'] = row.find('div', 'img').img['src'].replace('mpost4', 'mpost')
        movie['release_date'] = get_date(row.find('div', 'text').span.text)
        movie['intro'] = row.find('div', 'text').p.text.replace(u'...詳全文', '').replace('\n', '')
        trailer_li = row.find('div', 'text').find('li', 'trailer')
        movie['trailer_url'] = get_trailer_url(trailer_li.a['href']) if trailer_li else ''
        movies.append(movie)
    return movies


def get_date(date_str):
    # e.g. "上映日期:2017-03-23" -> match.group(0): "2017-03-23"
    pattern = '\d+-\d+-\d+'
    # re.compile API DOC: https://docs.python.org/3/library/re.html#re.compile
    # re.search API DOC: https://docs.python.org/3/library/re.html#search-vs-match
    match = re.search(pattern, date_str)
    if match is None:
        return date_str
    else:
        return match.group(0)


def get_movie_id(url):
    # e.g. "https://tw.rd.yahoo.com/referurl/movie/thisweek/info/*https://tw.movies.yahoo.com/movieinfo_main.html/id=6707"
    #      -> match.group(0): "/id=6707"
    pattern = '/id=\d+'
    match = re.search(pattern, url)
    if match is None:
        return url
    else:
        return match.group(0).replace('/id=', '')


def get_trailer_url(url):
    # e.g., 'https://tw.rd.yahoo.com/referurl/movie/thisweek/trailer/*https://tw.movies.yahoo.com/video/美女與野獸-最終版預告-024340912.html'
    return url.split('*')[1]


def get_complete_intro(movie_id):
    page = get_web_page(Y_MOVIE_INFO_URL + '/id=' + movie_id)
    if page:
        soup = BeautifulSoup.get(page, 'html5lib')
        div_text_show = soup.find('div', 'text show')
        if div_text_show:
            print(div_text_show.text)
        div_text_full = soup.find('div', 'text full')
        if div_text_full:
            print(div_text_full.text)
    return None


def main():
    page = get_web_page(Y_MOVIE_URL)
    if page:
        movies = get_movies(page)
        for movie in movies:
            print(movie)
        with open('movie.json', 'w', encoding='UTF-8') as file:
            json.dump(movies, file, indent=2, sort_keys=True, ensure_ascii=False)


if __name__ == '__main__':
    main()

輸出如下:

{'expectation': '98', 'ch_name': '加勒比海盜 神鬼奇航:死無對證', 'en_name': 'Pirates of the Caribbean: Dead Men Tell No Tales', 'movie_id': '6534', 'poster_url': 'https://s.yimg.com/vu/movies/fp/mpost/65/34/6534.jpg', 'release_date': '2017-05-24', 'intro': '位處墨西哥灣與加勒比海的神秘百慕達三角洲,發生過大量令人匪疑所思的失蹤事件,不僅讓這個地區蒙上一層神祕的色彩,更被烙上「魔鬼三角」', 'trailer_url': 'https://tw.movies.yahoo.com/video/加勒比海盜-神鬼奇航-死無對證-強尼戴普現身迪士尼樂園-013706487.html'}
{'expectation': '94', 'ch_name': '海灘救護隊', 'en_name': 'Baywatch', 'movie_id': '6682', 'poster_url': 'https://s.yimg.com/vu/movies/fp/mpost/66/82/6682.jpg', 'release_date': '2017-05-25', 'intro': '改編自1989 年同名電視劇。兩位不稱頭的肌肉猛男救生員準備到美國加州海灘應徵救護隊成員。', 'trailer_url': 'https://tw.movies.yahoo.com/video/海灘救護隊-海灘篇-031153665.html'}
{'expectation': '71', 'ch_name': '大釣哥', 'en_name': 'Hanky Panky', 'movie_id': '6657', 'poster_url': 'https://s.yimg.com/vu/movies/fp/mpost/66/57/6657.jpg', 'release_date': '2017-05-26', 'intro': '★萬眾矚目!坐擁10億票房以上紀錄不敗天王豬哥亮2017賀歲強檔! ★卡司陣容耳目一新,打造台灣賀歲全新類型! ★影帝影后聯袂登場', 'trailer_url': 'https://tw.movies.yahoo.com/video/大釣哥-刪減感人片段篇-092443147.html'}
{'expectation': '95', 'ch_name': '墨利斯的情人經典數位修復', 'en_name': 'Maurice', 'movie_id': '6816', 'poster_url': 'https://s.yimg.com/vu/movies/fp/mpost/68/16/6816.jpg', 'release_date': '2017-05-26', 'intro': '★ 2017柏林影展 經典單元 ★ 威尼斯影展銀獅獎、最佳男演員、Golden Osella最佳電影音樂 ★ 奧斯卡最佳服裝設計入圍 兩', 'trailer_url': 'https://tw.movies.yahoo.com/video/墨利斯的情人經典數位修復-中文預告-101351858.html'}
{'expectation': '96', 'ch_name': '愛在回家時', 'en_name': 'A Family Man', 'movie_id': '6827', 'poster_url': 'https://s.yimg.com/vu/movies/fp/mpost/68/27/6827.jpg', 'release_date': '2017-05-26', 'intro': '★ 傑瑞德巴特勒從影至今最動人演出,情感層次豐富更勝《P.S. 我愛妳》 ★ 《大法官》《會計師》編劇、《藥命俱樂部》製片聯手出擊,從', 'trailer_url': 'https://tw.movies.yahoo.com/video/愛在回家時-中文預告-094916516.html'}
{'expectation': '97', 'ch_name': '玩命鎗火', 'en_name': 'Free Fire', 'movie_id': '6865', 'poster_url': 'https://s.yimg.com/vu/movies/fp/mpost/68/65/6865.jpg', 'release_date': '2017-05-26', 'intro': '★ 黑色幽默 × 鎗林彈雨 × 極致暴力 × 爽度破表 × 笑到岔氣! ★《華爾街之狼》《隔離島》金獎名導馬丁史柯西斯超殺監製! ★ 暴力', 'trailer_url': 'https://tw.movies.yahoo.com/video/玩命鎗火-6秒看完-玩命鎗火-021640376.html'}
{'expectation': '80', 'ch_name': '2017臺北文學.閱影展', 'en_name': '\u3000', 'movie_id': '6871', 'poster_url': 'https://s.yimg.com/vu/movies/fp/mpost/68/71/6871.jpg', 'release_date': '2017-05-26', 'intro': '【策展緣起】 每一次的「文學閱影展」都是一種召喚。召喚觀眾翻開書頁,召喚讀者走入戲院,召喚所有人在電影中看見文學之美。正如文學', 'trailer_url': ''}
{'expectation': '92', 'ch_name': '為妳唱的歌', 'en_name': 'Song to Song', 'movie_id': '6876', 'poster_url': 'https://s.yimg.com/vu/movies/fp/mpost/68/76/6876.jpg', 'release_date': '2017-05-26', 'intro': '兩對戀人,一場音樂季,浪漫四角戀激情上演…。 菲(魯妮瑪拉飾)愛玩音樂,夢想成為創作歌手,談戀愛不受羈絆,她遇見了音樂人”BV', 'trailer_url': 'https://tw.movies.yahoo.com/video/為妳唱的歌-雷恩深情獻唱20秒預告-095213971.html'}
{'expectation': '40', 'ch_name': '我就是要結婚!', 'en_name': 'Through the Wall', 'movie_id': '6889', 'poster_url': 'https://s.yimg.com/vu/movies/fp/mpost/68/89/6889.jpg', 'release_date': '2017-05-26', 'intro': '只想被愛的蜜涵,能否找到完美另一半?! 誰會在這麼短的時間裡,成為她生命的另一半呢?年過三十的蜜涵沒有心儀對象,為了擁有一生一', 'trailer_url': 'https://tw.movies.yahoo.com/video/我就是要結婚-中文預告-103015177.html'}
{'expectation': '100', 'ch_name': '雙面法蘭茲', 'en_name': 'Frantz', 'movie_id': '6894', 'poster_url': 'https://s.yimg.com/vu/movies/fp/mpost/68/94/6894.jpg', 'release_date': '2017-05-26', 'intro': '兩國的無情戰場\u3000炸出三人的熾烈情場 次世界大戰德法交火,在殊死交鋒之際,一股超越敵對的溫熱情感,在壕溝中默默滋長。這一切', 'trailer_url': 'https://tw.movies.yahoo.com/video/雙面法蘭茲-中文預告-093941023.html'}
{'expectation': '33', 'ch_name': '回聲戀習曲', 'en_name': 'One Step', 'movie_id': '6903', 'poster_url': 'https://s.yimg.com/vu/movies/fp/mpost/69/03/6903.jpg', 'release_date': '2017-05-26', 'intro': '★韓國超人氣女子團體2NE1前成員Dara,首次大銀幕一展歌喉,詮釋試圖找回過去記憶的魅力女主角,讓影迷心疼又尖叫! ★南韓名導金基德盛', 'trailer_url': 'https://tw.movies.yahoo.com/video/回聲戀習曲-中文預告-150821910.html'}
{'expectation': '100', 'ch_name': '藍心狂想曲', 'en_name': 'The Blue Hearts', 'movie_id': '6904', 'poster_url': 'https://s.yimg.com/vu/movies/fp/mpost/69/04/6904.jpg', 'release_date': '2017-05-26', 'intro': '★2016高雄電影節 口碑爆棚 ★2016日本夕張國際奇幻影展 ★2016夏威夷國際電影節 ★日本傳奇龐克天團「藍心樂團」三十週年出道', 'trailer_url': 'https://tw.movies.yahoo.com/video/藍心狂想曲-中文預告-150821276.html'}
{'expectation': '75', 'ch_name': '吃吃的愛', 'en_name': "Didi's Dream", 'movie_id': '6870', 'poster_url': 'https://s.yimg.com/vu/movies/fp/mpost/68/70/6870.jpg', 'release_date': '2017-05-27', 'intro': '想要在巨星姐姐面前證明自己的臨時演員上官娣娣,和多年期待真愛卻在最後被狠狠出賣的太空黑鳥麵館老板娘許春梅,當兩個人的世界以想象不到', 'trailer_url': 'https://tw.movies.yahoo.com/video/吃吃的愛-洗腦神曲鯉魚歌-012223571.html'}

Process finished with exit code 0

產生的檔案:

[
  {
    "ch_name": "加勒比海盜 神鬼奇航:死無對證",
    "en_name": "Pirates of the Caribbean: Dead Men Tell No Tales",
    "expectation": "98",
    "intro": "位處墨西哥灣與加勒比海的神秘百慕達三角洲,發生過大量令人匪疑所思的失蹤事件,不僅讓這個地區蒙上一層神祕的色彩,更被烙上「魔鬼三角」",
    "movie_id": "6534",
    "poster_url": "https://s.yimg.com/vu/movies/fp/mpost/65/34/6534.jpg",
    "release_date": "2017-05-24",
    "trailer_url": "https://tw.movies.yahoo.com/video/加勒比海盜-神鬼奇航-死無對證-強尼戴普現身迪士尼樂園-013706487.html"
  },
  {
    "ch_name": "海灘救護隊",
    "en_name": "Baywatch",
    "expectation": "94",
    "intro": "改編自1989 年同名電視劇。兩位不稱頭的肌肉猛男救生員準備到美國加州海灘應徵救護隊成員。",
    "movie_id": "6682",
    "poster_url": "https://s.yimg.com/vu/movies/fp/mpost/66/82/6682.jpg",
    "release_date": "2017-05-25",
    "trailer_url": "https://tw.movies.yahoo.com/video/海灘救護隊-海灘篇-031153665.html"
  },
  {
    "ch_name": "大釣哥",
    "en_name": "Hanky Panky",
    "expectation": "71",
    "intro": "★萬眾矚目!坐擁10億票房以上紀錄不敗天王豬哥亮2017賀歲強檔! ★卡司陣容耳目一新,打造台灣賀歲全新類型! ★影帝影后聯袂登場",
    "movie_id": "6657",
    "poster_url": "https://s.yimg.com/vu/movies/fp/mpost/66/57/6657.jpg",
    "release_date": "2017-05-26",
    "trailer_url": "https://tw.movies.yahoo.com/video/大釣哥-刪減感人片段篇-092443147.html"
  },
  {
    "ch_name": "墨利斯的情人經典數位修復",
    "en_name": "Maurice",
    "expectation": "95",
    "intro": "★ 2017柏林影展 經典單元 ★ 威尼斯影展銀獅獎、最佳男演員、Golden Osella最佳電影音樂 ★ 奧斯卡最佳服裝設計入圍 兩",
    "movie_id": "6816",
    "poster_url": "https://s.yimg.com/vu/movies/fp/mpost/68/16/6816.jpg",
    "release_date": "2017-05-26",
    "trailer_url": "https://tw.movies.yahoo.com/video/墨利斯的情人經典數位修復-中文預告-101351858.html"
  },
  {
    "ch_name": "愛在回家時",
    "en_name": "A Family Man",
    "expectation": "96",
    "intro": "★ 傑瑞德巴特勒從影至今最動人演出,情感層次豐富更勝《P.S. 我愛妳》 ★ 《大法官》《會計師》編劇、《藥命俱樂部》製片聯手出擊,從",
    "movie_id": "6827",
    "poster_url": "https://s.yimg.com/vu/movies/fp/mpost/68/27/6827.jpg",
    "release_date": "2017-05-26",
    "trailer_url": "https://tw.movies.yahoo.com/video/愛在回家時-中文預告-094916516.html"
  },
  {
    "ch_name": "玩命鎗火",
    "en_name": "Free Fire",
    "expectation": "97",
    "intro": "★ 黑色幽默 × 鎗林彈雨 × 極致暴力 × 爽度破表 × 笑到岔氣! ★《華爾街之狼》《隔離島》金獎名導馬丁史柯西斯超殺監製! ★ 暴力",
    "movie_id": "6865",
    "poster_url": "https://s.yimg.com/vu/movies/fp/mpost/68/65/6865.jpg",
    "release_date": "2017-05-26",
    "trailer_url": "https://tw.movies.yahoo.com/video/玩命鎗火-6秒看完-玩命鎗火-021640376.html"
  },
  {
    "ch_name": "2017臺北文學.閱影展",
    "en_name": " ",
    "expectation": "80",
    "intro": "【策展緣起】 每一次的「文學閱影展」都是一種召喚。召喚觀眾翻開書頁,召喚讀者走入戲院,召喚所有人在電影中看見文學之美。正如文學",
    "movie_id": "6871",
    "poster_url": "https://s.yimg.com/vu/movies/fp/mpost/68/71/6871.jpg",
    "release_date": "2017-05-26",
    "trailer_url": ""
  },
  {
    "ch_name": "為妳唱的歌",
    "en_name": "Song to Song",
    "expectation": "92",
    "intro": "兩對戀人,一場音樂季,浪漫四角戀激情上演…。 菲(魯妮瑪拉飾)愛玩音樂,夢想成為創作歌手,談戀愛不受羈絆,她遇見了音樂人”BV",
    "movie_id": "6876",
    "poster_url": "https://s.yimg.com/vu/movies/fp/mpost/68/76/6876.jpg",
    "release_date": "2017-05-26",
    "trailer_url": "https://tw.movies.yahoo.com/video/為妳唱的歌-雷恩深情獻唱20秒預告-095213971.html"
  },
  {
    "ch_name": "我就是要結婚!",
    "en_name": "Through the Wall",
    "expectation": "40",
    "intro": "只想被愛的蜜涵,能否找到完美另一半?! 誰會在這麼短的時間裡,成為她生命的另一半呢?年過三十的蜜涵沒有心儀對象,為了擁有一生一",
    "movie_id": "6889",
    "poster_url": "https://s.yimg.com/vu/movies/fp/mpost/68/89/6889.jpg",
    "release_date": "2017-05-26",
    "trailer_url": "https://tw.movies.yahoo.com/video/我就是要結婚-中文預告-103015177.html"
  },
  {
    "ch_name": "雙面法蘭茲",
    "en_name": "Frantz",
    "expectation": "100",
    "intro": "兩國的無情戰場 炸出三人的熾烈情場 次世界大戰德法交火,在殊死交鋒之際,一股超越敵對的溫熱情感,在壕溝中默默滋長。這一切",
    "movie_id": "6894",
    "poster_url": "https://s.yimg.com/vu/movies/fp/mpost/68/94/6894.jpg",
    "release_date": "2017-05-26",
    "trailer_url": "https://tw.movies.yahoo.com/video/雙面法蘭茲-中文預告-093941023.html"
  },
  {
    "ch_name": "回聲戀習曲",
    "en_name": "One Step",
    "expectation": "33",
    "intro": "★韓國超人氣女子團體2NE1前成員Dara,首次大銀幕一展歌喉,詮釋試圖找回過去記憶的魅力女主角,讓影迷心疼又尖叫! ★南韓名導金基德盛",
    "movie_id": "6903",
    "poster_url": "https://s.yimg.com/vu/movies/fp/mpost/69/03/6903.jpg",
    "release_date": "2017-05-26",
    "trailer_url": "https://tw.movies.yahoo.com/video/回聲戀習曲-中文預告-150821910.html"
  },
  {
    "ch_name": "藍心狂想曲",
    "en_name": "The Blue Hearts",
    "expectation": "100",
    "intro": "★2016高雄電影節 口碑爆棚 ★2016日本夕張國際奇幻影展 ★2016夏威夷國際電影節 ★日本傳奇龐克天團「藍心樂團」三十週年出道",
    "movie_id": "6904",
    "poster_url": "https://s.yimg.com/vu/movies/fp/mpost/69/04/6904.jpg",
    "release_date": "2017-05-26",
    "trailer_url": "https://tw.movies.yahoo.com/video/藍心狂想曲-中文預告-150821276.html"
  },
  {
    "ch_name": "吃吃的愛",
    "en_name": "Didi's Dream",
    "expectation": "75",
    "intro": "想要在巨星姐姐面前證明自己的臨時演員上官娣娣,和多年期待真愛卻在最後被狠狠出賣的太空黑鳥麵館老板娘許春梅,當兩個人的世界以想象不到",
    "movie_id": "6870",
    "poster_url": "https://s.yimg.com/vu/movies/fp/mpost/68/70/6870.jpg",
    "release_date": "2017-05-27",
    "trailer_url": "https://tw.movies.yahoo.com/video/吃吃的愛-洗腦神曲鯉魚歌-012223571.html"
  }
]

原始碼點我

Last updated