> For the complete documentation index, see [llms.txt](https://clu.gitbook.io/python-web-crawler-note/llms.txt). Markdown versions of documentation pages are available by appending `.md` to page URLs; this page is available as [Markdown](https://clu.gitbook.io/python-web-crawler-note/32-yahooqi-mo-dian-ying-ben-zhou-xin-pian.md).

# 3.2 Yahoo奇摩電影本週新片

Yahoo電影也算滿好爬的, 這邊要做的事情是把本週新片及其相關資訊都列出來, 並且寫入json檔案裡:

```python
import requests
import re
import json
from bs4 import BeautifulSoup


Y_MOVIE_URL = 'https://tw.movies.yahoo.com/movie_thisweek.html'

Y_MOVIE_INFO_URL = 'https://tw.movies.yahoo.com/movieinfo_main.htm'
Y_MOVIE_PHOTO_URL = 'https://tw.movies.yahoo.com/movieinfo_photos.html'
Y_MOVIE_TIME_URL = 'https://tw.movies.yahoo.com/movietime_result.html'


def get_web_page(url):
    resp = requests.get(url)
    if resp.status_code != 200:
        print('Invalid url: ', resp.url)
        return None
    else:
        return resp.text


def get_movies(dom):
    soup = BeautifulSoup(dom, 'html5lib')
    movies = []
    rows = soup.find_all('div', 'clearfix row')
    for row in rows:
        movie = dict()
        movie['expectation'] = row.find(id='ymvle').find('div', 'bd clearfix ').em.text
        movie['ch_name'] = row.find('div', 'text').h4.text
        movie['en_name'] = row.find('div', 'text').h5.text
        movie['movie_id'] = get_movie_id(row.find('div', 'text').h4.a['href'])
        movie['poster_url'] = row.find('div', 'img').img['src'].replace('mpost4', 'mpost')
        movie['release_date'] = get_date(row.find('div', 'text').span.text)
        movie['intro'] = row.find('div', 'text').p.text.replace(u'...詳全文', '').replace('\n', '')
        trailer_li = row.find('div', 'text').find('li', 'trailer')
        movie['trailer_url'] = get_trailer_url(trailer_li.a['href']) if trailer_li else ''
        movies.append(movie)
    return movies


def get_date(date_str):
    # e.g. "上映日期：2017-03-23" -> match.group(0): "2017-03-23"
    pattern = '\d+-\d+-\d+'
    # re.compile API DOC: https://docs.python.org/3/library/re.html#re.compile
    # re.search API DOC: https://docs.python.org/3/library/re.html#search-vs-match
    match = re.search(pattern, date_str)
    if match is None:
        return date_str
    else:
        return match.group(0)


def get_movie_id(url):
    # e.g. "https://tw.rd.yahoo.com/referurl/movie/thisweek/info/*https://tw.movies.yahoo.com/movieinfo_main.html/id=6707"
    #      -> match.group(0): "/id=6707"
    pattern = '/id=\d+'
    match = re.search(pattern, url)
    if match is None:
        return url
    else:
        return match.group(0).replace('/id=', '')


def get_trailer_url(url):
    # e.g., 'https://tw.rd.yahoo.com/referurl/movie/thisweek/trailer/*https://tw.movies.yahoo.com/video/美女與野獸-最終版預告-024340912.html'
    return url.split('*')[1]


def get_complete_intro(movie_id):
    page = get_web_page(Y_MOVIE_INFO_URL + '/id=' + movie_id)
    if page:
        soup = BeautifulSoup.get(page, 'html5lib')
        div_text_show = soup.find('div', 'text show')
        if div_text_show:
            print(div_text_show.text)
        div_text_full = soup.find('div', 'text full')
        if div_text_full:
            print(div_text_full.text)
    return None


def main():
    page = get_web_page(Y_MOVIE_URL)
    if page:
        movies = get_movies(page)
        for movie in movies:
            print(movie)
        with open('movie.json', 'w', encoding='UTF-8') as file:
            json.dump(movies, file, indent=2, sort_keys=True, ensure_ascii=False)


if __name__ == '__main__':
    main()
```

輸出如下:

```
{'expectation': '98', 'ch_name': '加勒比海盜 神鬼奇航：死無對證', 'en_name': 'Pirates of the Caribbean: Dead Men Tell No Tales', 'movie_id': '6534', 'poster_url': 'https://s.yimg.com/vu/movies/fp/mpost/65/34/6534.jpg', 'release_date': '2017-05-24', 'intro': '位處墨西哥灣與加勒比海的神秘百慕達三角洲，發生過大量令人匪疑所思的失蹤事件，不僅讓這個地區蒙上一層神祕的色彩，更被烙上「魔鬼三角」', 'trailer_url': 'https://tw.movies.yahoo.com/video/加勒比海盜-神鬼奇航-死無對證-強尼戴普現身迪士尼樂園-013706487.html'}
{'expectation': '94', 'ch_name': '海灘救護隊', 'en_name': 'Baywatch', 'movie_id': '6682', 'poster_url': 'https://s.yimg.com/vu/movies/fp/mpost/66/82/6682.jpg', 'release_date': '2017-05-25', 'intro': '改編自1989 年同名電視劇。兩位不稱頭的肌肉猛男救生員準備到美國加州海灘應徵救護隊成員。', 'trailer_url': 'https://tw.movies.yahoo.com/video/海灘救護隊-海灘篇-031153665.html'}
{'expectation': '71', 'ch_name': '大釣哥', 'en_name': 'Hanky Panky', 'movie_id': '6657', 'poster_url': 'https://s.yimg.com/vu/movies/fp/mpost/66/57/6657.jpg', 'release_date': '2017-05-26', 'intro': '★萬眾矚目！坐擁10億票房以上紀錄不敗天王豬哥亮2017賀歲強檔！ ★卡司陣容耳目一新，打造台灣賀歲全新類型！ ★影帝影后聯袂登場', 'trailer_url': 'https://tw.movies.yahoo.com/video/大釣哥-刪減感人片段篇-092443147.html'}
{'expectation': '95', 'ch_name': '墨利斯的情人經典數位修復', 'en_name': 'Maurice', 'movie_id': '6816', 'poster_url': 'https://s.yimg.com/vu/movies/fp/mpost/68/16/6816.jpg', 'release_date': '2017-05-26', 'intro': '★ 2017柏林影展 經典單元 ★ 威尼斯影展銀獅獎、最佳男演員、Golden Osella最佳電影音樂 ★ 奧斯卡最佳服裝設計入圍 兩', 'trailer_url': 'https://tw.movies.yahoo.com/video/墨利斯的情人經典數位修復-中文預告-101351858.html'}
{'expectation': '96', 'ch_name': '愛在回家時', 'en_name': 'A Family Man', 'movie_id': '6827', 'poster_url': 'https://s.yimg.com/vu/movies/fp/mpost/68/27/6827.jpg', 'release_date': '2017-05-26', 'intro': '★ 傑瑞德巴特勒從影至今最動人演出，情感層次豐富更勝《P.S. 我愛妳》 ★ 《大法官》《會計師》編劇、《藥命俱樂部》製片聯手出擊，從', 'trailer_url': 'https://tw.movies.yahoo.com/video/愛在回家時-中文預告-094916516.html'}
{'expectation': '97', 'ch_name': '玩命鎗火', 'en_name': 'Free Fire', 'movie_id': '6865', 'poster_url': 'https://s.yimg.com/vu/movies/fp/mpost/68/65/6865.jpg', 'release_date': '2017-05-26', 'intro': '★ 黑色幽默 × 鎗林彈雨 × 極致暴力 × 爽度破表 × 笑到岔氣！ ★《華爾街之狼》《隔離島》金獎名導馬丁史柯西斯超殺監製！ ★ 暴力', 'trailer_url': 'https://tw.movies.yahoo.com/video/玩命鎗火-6秒看完-玩命鎗火-021640376.html'}
{'expectation': '80', 'ch_name': '2017臺北文學．閱影展', 'en_name': '\u3000', 'movie_id': '6871', 'poster_url': 'https://s.yimg.com/vu/movies/fp/mpost/68/71/6871.jpg', 'release_date': '2017-05-26', 'intro': '【策展緣起】 每一次的「文學閱影展」都是一種召喚。召喚觀眾翻開書頁，召喚讀者走入戲院，召喚所有人在電影中看見文學之美。正如文學', 'trailer_url': ''}
{'expectation': '92', 'ch_name': '為妳唱的歌', 'en_name': 'Song to Song', 'movie_id': '6876', 'poster_url': 'https://s.yimg.com/vu/movies/fp/mpost/68/76/6876.jpg', 'release_date': '2017-05-26', 'intro': '兩對戀人，一場音樂季，浪漫四角戀激情上演…。 菲(魯妮瑪拉飾)愛玩音樂，夢想成為創作歌手，談戀愛不受羈絆，她遇見了音樂人”BV', 'trailer_url': 'https://tw.movies.yahoo.com/video/為妳唱的歌-雷恩深情獻唱20秒預告-095213971.html'}
{'expectation': '40', 'ch_name': '我就是要結婚！', 'en_name': 'Through the Wall', 'movie_id': '6889', 'poster_url': 'https://s.yimg.com/vu/movies/fp/mpost/68/89/6889.jpg', 'release_date': '2017-05-26', 'intro': '只想被愛的蜜涵，能否找到完美另一半？！ 誰會在這麼短的時間裡，成為她生命的另一半呢？年過三十的蜜涵沒有心儀對象，為了擁有一生一', 'trailer_url': 'https://tw.movies.yahoo.com/video/我就是要結婚-中文預告-103015177.html'}
{'expectation': '100', 'ch_name': '雙面法蘭茲', 'en_name': 'Frantz', 'movie_id': '6894', 'poster_url': 'https://s.yimg.com/vu/movies/fp/mpost/68/94/6894.jpg', 'release_date': '2017-05-26', 'intro': '兩國的無情戰場\u3000炸出三人的熾烈情場 次世界大戰德法交火，在殊死交鋒之際，一股超越敵對的溫熱情感，在壕溝中默默滋長。這一切', 'trailer_url': 'https://tw.movies.yahoo.com/video/雙面法蘭茲-中文預告-093941023.html'}
{'expectation': '33', 'ch_name': '回聲戀習曲', 'en_name': 'One Step', 'movie_id': '6903', 'poster_url': 'https://s.yimg.com/vu/movies/fp/mpost/69/03/6903.jpg', 'release_date': '2017-05-26', 'intro': '★韓國超人氣女子團體2NE1前成員Dara，首次大銀幕一展歌喉，詮釋試圖找回過去記憶的魅力女主角，讓影迷心疼又尖叫！ ★南韓名導金基德盛', 'trailer_url': 'https://tw.movies.yahoo.com/video/回聲戀習曲-中文預告-150821910.html'}
{'expectation': '100', 'ch_name': '藍心狂想曲', 'en_name': 'The Blue Hearts', 'movie_id': '6904', 'poster_url': 'https://s.yimg.com/vu/movies/fp/mpost/69/04/6904.jpg', 'release_date': '2017-05-26', 'intro': '★2016高雄電影節 口碑爆棚 ★2016日本夕張國際奇幻影展 ★2016夏威夷國際電影節 ★日本傳奇龐克天團「藍心樂團」三十週年出道', 'trailer_url': 'https://tw.movies.yahoo.com/video/藍心狂想曲-中文預告-150821276.html'}
{'expectation': '75', 'ch_name': '吃吃的愛', 'en_name': "Didi's Dream", 'movie_id': '6870', 'poster_url': 'https://s.yimg.com/vu/movies/fp/mpost/68/70/6870.jpg', 'release_date': '2017-05-27', 'intro': '想要在巨星姐姐面前證明自己的臨時演員上官娣娣，和多年期待真愛卻在最後被狠狠出賣的太空黑鳥麵館老板娘許春梅，當兩個人的世界以想象不到', 'trailer_url': 'https://tw.movies.yahoo.com/video/吃吃的愛-洗腦神曲鯉魚歌-012223571.html'}

Process finished with exit code 0
```

產生的檔案:

```javascript
[
  {
    "ch_name": "加勒比海盜 神鬼奇航：死無對證",
    "en_name": "Pirates of the Caribbean: Dead Men Tell No Tales",
    "expectation": "98",
    "intro": "位處墨西哥灣與加勒比海的神秘百慕達三角洲，發生過大量令人匪疑所思的失蹤事件，不僅讓這個地區蒙上一層神祕的色彩，更被烙上「魔鬼三角」",
    "movie_id": "6534",
    "poster_url": "https://s.yimg.com/vu/movies/fp/mpost/65/34/6534.jpg",
    "release_date": "2017-05-24",
    "trailer_url": "https://tw.movies.yahoo.com/video/加勒比海盜-神鬼奇航-死無對證-強尼戴普現身迪士尼樂園-013706487.html"
  },
  {
    "ch_name": "海灘救護隊",
    "en_name": "Baywatch",
    "expectation": "94",
    "intro": "改編自1989 年同名電視劇。兩位不稱頭的肌肉猛男救生員準備到美國加州海灘應徵救護隊成員。",
    "movie_id": "6682",
    "poster_url": "https://s.yimg.com/vu/movies/fp/mpost/66/82/6682.jpg",
    "release_date": "2017-05-25",
    "trailer_url": "https://tw.movies.yahoo.com/video/海灘救護隊-海灘篇-031153665.html"
  },
  {
    "ch_name": "大釣哥",
    "en_name": "Hanky Panky",
    "expectation": "71",
    "intro": "★萬眾矚目！坐擁10億票房以上紀錄不敗天王豬哥亮2017賀歲強檔！ ★卡司陣容耳目一新，打造台灣賀歲全新類型！ ★影帝影后聯袂登場",
    "movie_id": "6657",
    "poster_url": "https://s.yimg.com/vu/movies/fp/mpost/66/57/6657.jpg",
    "release_date": "2017-05-26",
    "trailer_url": "https://tw.movies.yahoo.com/video/大釣哥-刪減感人片段篇-092443147.html"
  },
  {
    "ch_name": "墨利斯的情人經典數位修復",
    "en_name": "Maurice",
    "expectation": "95",
    "intro": "★ 2017柏林影展 經典單元 ★ 威尼斯影展銀獅獎、最佳男演員、Golden Osella最佳電影音樂 ★ 奧斯卡最佳服裝設計入圍 兩",
    "movie_id": "6816",
    "poster_url": "https://s.yimg.com/vu/movies/fp/mpost/68/16/6816.jpg",
    "release_date": "2017-05-26",
    "trailer_url": "https://tw.movies.yahoo.com/video/墨利斯的情人經典數位修復-中文預告-101351858.html"
  },
  {
    "ch_name": "愛在回家時",
    "en_name": "A Family Man",
    "expectation": "96",
    "intro": "★ 傑瑞德巴特勒從影至今最動人演出，情感層次豐富更勝《P.S. 我愛妳》 ★ 《大法官》《會計師》編劇、《藥命俱樂部》製片聯手出擊，從",
    "movie_id": "6827",
    "poster_url": "https://s.yimg.com/vu/movies/fp/mpost/68/27/6827.jpg",
    "release_date": "2017-05-26",
    "trailer_url": "https://tw.movies.yahoo.com/video/愛在回家時-中文預告-094916516.html"
  },
  {
    "ch_name": "玩命鎗火",
    "en_name": "Free Fire",
    "expectation": "97",
    "intro": "★ 黑色幽默 × 鎗林彈雨 × 極致暴力 × 爽度破表 × 笑到岔氣！ ★《華爾街之狼》《隔離島》金獎名導馬丁史柯西斯超殺監製！ ★ 暴力",
    "movie_id": "6865",
    "poster_url": "https://s.yimg.com/vu/movies/fp/mpost/68/65/6865.jpg",
    "release_date": "2017-05-26",
    "trailer_url": "https://tw.movies.yahoo.com/video/玩命鎗火-6秒看完-玩命鎗火-021640376.html"
  },
  {
    "ch_name": "2017臺北文學．閱影展",
    "en_name": "　",
    "expectation": "80",
    "intro": "【策展緣起】 每一次的「文學閱影展」都是一種召喚。召喚觀眾翻開書頁，召喚讀者走入戲院，召喚所有人在電影中看見文學之美。正如文學",
    "movie_id": "6871",
    "poster_url": "https://s.yimg.com/vu/movies/fp/mpost/68/71/6871.jpg",
    "release_date": "2017-05-26",
    "trailer_url": ""
  },
  {
    "ch_name": "為妳唱的歌",
    "en_name": "Song to Song",
    "expectation": "92",
    "intro": "兩對戀人，一場音樂季，浪漫四角戀激情上演…。 菲(魯妮瑪拉飾)愛玩音樂，夢想成為創作歌手，談戀愛不受羈絆，她遇見了音樂人”BV",
    "movie_id": "6876",
    "poster_url": "https://s.yimg.com/vu/movies/fp/mpost/68/76/6876.jpg",
    "release_date": "2017-05-26",
    "trailer_url": "https://tw.movies.yahoo.com/video/為妳唱的歌-雷恩深情獻唱20秒預告-095213971.html"
  },
  {
    "ch_name": "我就是要結婚！",
    "en_name": "Through the Wall",
    "expectation": "40",
    "intro": "只想被愛的蜜涵，能否找到完美另一半？！ 誰會在這麼短的時間裡，成為她生命的另一半呢？年過三十的蜜涵沒有心儀對象，為了擁有一生一",
    "movie_id": "6889",
    "poster_url": "https://s.yimg.com/vu/movies/fp/mpost/68/89/6889.jpg",
    "release_date": "2017-05-26",
    "trailer_url": "https://tw.movies.yahoo.com/video/我就是要結婚-中文預告-103015177.html"
  },
  {
    "ch_name": "雙面法蘭茲",
    "en_name": "Frantz",
    "expectation": "100",
    "intro": "兩國的無情戰場　炸出三人的熾烈情場 次世界大戰德法交火，在殊死交鋒之際，一股超越敵對的溫熱情感，在壕溝中默默滋長。這一切",
    "movie_id": "6894",
    "poster_url": "https://s.yimg.com/vu/movies/fp/mpost/68/94/6894.jpg",
    "release_date": "2017-05-26",
    "trailer_url": "https://tw.movies.yahoo.com/video/雙面法蘭茲-中文預告-093941023.html"
  },
  {
    "ch_name": "回聲戀習曲",
    "en_name": "One Step",
    "expectation": "33",
    "intro": "★韓國超人氣女子團體2NE1前成員Dara，首次大銀幕一展歌喉，詮釋試圖找回過去記憶的魅力女主角，讓影迷心疼又尖叫！ ★南韓名導金基德盛",
    "movie_id": "6903",
    "poster_url": "https://s.yimg.com/vu/movies/fp/mpost/69/03/6903.jpg",
    "release_date": "2017-05-26",
    "trailer_url": "https://tw.movies.yahoo.com/video/回聲戀習曲-中文預告-150821910.html"
  },
  {
    "ch_name": "藍心狂想曲",
    "en_name": "The Blue Hearts",
    "expectation": "100",
    "intro": "★2016高雄電影節 口碑爆棚 ★2016日本夕張國際奇幻影展 ★2016夏威夷國際電影節 ★日本傳奇龐克天團「藍心樂團」三十週年出道",
    "movie_id": "6904",
    "poster_url": "https://s.yimg.com/vu/movies/fp/mpost/69/04/6904.jpg",
    "release_date": "2017-05-26",
    "trailer_url": "https://tw.movies.yahoo.com/video/藍心狂想曲-中文預告-150821276.html"
  },
  {
    "ch_name": "吃吃的愛",
    "en_name": "Didi's Dream",
    "expectation": "75",
    "intro": "想要在巨星姐姐面前證明自己的臨時演員上官娣娣，和多年期待真愛卻在最後被狠狠出賣的太空黑鳥麵館老板娘許春梅，當兩個人的世界以想象不到",
    "movie_id": "6870",
    "poster_url": "https://s.yimg.com/vu/movies/fp/mpost/68/70/6870.jpg",
    "release_date": "2017-05-27",
    "trailer_url": "https://tw.movies.yahoo.com/video/吃吃的愛-洗腦神曲鯉魚歌-012223571.html"
  }
]
```

原始碼[點我](https://github.com/yotsuba1022/web-crawler-practice/blob/master/ch3/yahoo_movie.py)


---

# Agent Instructions
This documentation is published with GitBook. GitBook is the documentation platform designed so that both humans and AI agents can read, navigate, and reason over technical content effectively. Learn more at gitbook.com.

## Querying This Documentation
If you need additional information that is not directly available in this page, you can query the documentation dynamically by asking a question.

Perform an HTTP GET request on the current page URL with the `ask` query parameter:

```
GET https://clu.gitbook.io/python-web-crawler-note/32-yahooqi-mo-dian-ying-ben-zhou-xin-pian.md?ask=<question>
```

The question should be specific, self-contained, and written in natural language.
The response will contain a direct answer to the question and relevant excerpts and sources from the documentation.

Use this mechanism when the answer is not explicitly present in the current page, you need clarification or additional context, or you want to retrieve related documentation sections.
