8.1 空氣品質監測網
這個網站可以讓我們根據選擇的地區看到空氣品質數據, 不過基於安全上的考量, 其對POST請求有一些特殊的限制, 就是除了跟查詢有關的表單資料外, 還要另外送出其他值: __VIEWSTATE, __EVENTVALIDATION以及__VIEWSTATEGENERATOR. 這些額外的值都可以透過GET請求空氣品質監測網得到的response中獲得.
import requests
from bs4 import BeautifulSoup
EPA_TAQM_URL = 'http://taqm.epa.gov.tw/taqm/tw/HourlyData.aspx'
def generate_query_form_data(start_date, end_date):
resp = requests.get(EPA_TAQM_URL)
dom = BeautifulSoup(resp.text, 'html5lib')
view_state = dom.find(id='__VIEWSTATE')['value']
event_validation = dom.find(id='__EVENTVALIDATION')['value']
viewstate_generator = dom.find(id='__VIEWSTATEGENERATOR')['value']
# In all the ctlxx$[var_name], the xx will change dynamically,
# need to check the value before craw the web.
# TODO: Refactor it to collect the xx value dynamically.
form_data = {
'__VIEWSTATE': view_state,
'__EVENTVALIDATION': event_validation,
'__VIEWSTATEGENERATOR': viewstate_generator,
'ctl09$lbSite': '56',
'ctl09$lbParam': '4',
'ctl09$txtDateS': start_date,
'ctl09$txtDateE': end_date,
'ctl09$btnQuery': '查詢即時值'
}
return form_data
def get_web_content(start_date, end_date):
form_data = generate_query_form_data(start_date, end_date)
if form_data:
resp = requests.post(EPA_TAQM_URL, data=form_data)
dom = BeautifulSoup(resp.text, 'html5lib')
return dom
else:
return None
def main():
start_date = '2017/05/20'
end_date = '2017/05/22'
dom = get_web_content(start_date, end_date)
if dom:
for table in dom.find_all('table', 'TABLE_G'):
print([s for s in table.stripped_strings])
if __name__ == '__main__':
main()
輸出結果:
['日期', '00', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '05/20', '34', '32', '38', '43', '40', '41', '43', '53', '55', '59', '65', '51', '36', '15', '25', '28', '40', '43', '36', '33', '27', '35', '42', '47', '05/21', '51', '54', '57', '59', '60', '64', '71', '66', '68', '59', '54', '35', '35', '39', '62', '59', '53', '37', '36', '35', '31', '34', '32', '36', '05/22', '30', '39', '37', '42', '39', '35', '36', '38', '45', '41', '35', '30', '43', '43', '45', '28', '22', '24', '27', '33', '30', '30', '21', '17']
Process finished with exit code 0
原始碼點我
Last updated