商城購物網(wǎng)站建設(shè)方案短視頻營銷策略
一、網(wǎng)址:
全國行政區(qū)劃信息查詢平臺(tái)
二、分析并搭建框架
檢查網(wǎng)頁源碼:
檢查網(wǎng)頁源碼可以發(fā)現(xiàn): 所有省級(jí)信息全部在javaScript下的json中,會(huì)在頁面加載時(shí)加載json數(shù)據(jù),填充到頁面的option中。
1、第一步:使用正則表達(dá)式抓取json數(shù)據(jù)并解析,組成一個(gè)province集合:
# 獲取省的集合def get_province(self):pattern = re.compile(r"var json =(.*?);", re.MULTILINE | re.DOTALL)script = self.soup.find("script", text=pattern)lists = str(pattern.search(script.text).group(1))json_list = json.loads(lists)# province_list = set()province_dict = dict()for json_data in json_list:province = json_data['shengji']quhua_code = json_data['quHuaDaiMa']province_dict.update({quhua_code: province})# province_list.add(province)# print(province_dict)return province_dict
2、第二步:檢查該網(wǎng)站實(shí)現(xiàn)級(jí)聯(lián)查詢的方式,找出查詢市區(qū)的方式
根據(jù)這段源碼可看出,在選擇 省級(jí)的后,網(wǎng)頁會(huì)調(diào)用selectJson接口進(jìn)行一個(gè)post請(qǐng)求,上圖可以看到請(qǐng)求的body和header等信息。
代碼:
# 獲取市def get_city(self, shengji):body = ("shengji=" + shengji).encode('UTF-8')# body = "shengji='江蘇省(蘇)'"..encode('UTF-8')headers = {'Content-Type': "application/x-www-form-urlencoded; charset=utf-8",'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, ""like Gecko) Chrome/77.0.3865.120 Safari/537.36"}response = requests.post('http://xzqh.mca.gov.cn/selectJson', data=body, headers=headers)content = response.contentjson_list = json.loads(content)# city_list = set()city_dict = dict()for json_data in json_list:citys = json_data['diji']# city_list.add(citys)quhua_code = json_data['quHuaDaiMa']city_dict.update({quhua_code: citys})return city_dict# return city_list# 獲取區(qū)def get_area(self, shengji, diji):body = ("shengji=" + shengji + "&diji=" + diji).encode('UTF-8')headers = {'Content-Type': "application/x-www-form-urlencoded; charset=utf-8",'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, ""like Gecko) Chrome/77.0.3865.120 Safari/537.36"}response = requests.post('http://xzqh.mca.gov.cn/selectJson', data=body, headers=headers)content = response.contentjson_list = json.loads(content)# area_list = set()area_dict = dict()for json_data in json_list:area = json_data['xianji']# area_list.add(area)area_code = json_data['quHuaDaiMa']area_dict.update({area_code: area})return area_dict
3、第三步:main函數(shù)(遍歷所有省市區(qū)+數(shù)據(jù)入庫)
數(shù)據(jù)庫表結(jié)構(gòu)如下:
三、全部代碼:
import requests
from bs4 import BeautifulSoup
import pymysql
import re
import jsonclass allAreaDataNew(object):base_url = 'http://xzqh.mca.gov.cn/map'headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'}wb_data = requests.get(base_url, headers=headers)wb_data.encoding = 'GBK'soup = BeautifulSoup(wb_data.text, 'lxml')# print(soup)def __init__(self):#### 自己數(shù)據(jù)庫信息self.db = pymysql.connect("***", "***", "***", "***", charset="utf8mb4") # mysql數(shù)據(jù)庫self.main()self.db.close()# 入口def main(self):sql_list = set()province_dict = self.get_province()for province_code in province_dict:province = province_dict[province_code]city_dict = self.get_city(province)sql_province = "insert into area_config values (null,'" + province + "','PROVINCE'," + province_code + ",0)"sql_list.add(sql_province)print(province_code + "----------------------------------省------------------------------------------" + province + "\n")for city_code in city_dict:city = city_dict[city_code]area_dict = self.get_area(province, city)print(city_code + "*******************市****************" + city + "\n")# 處理 省直轄縣級(jí)行政單位if city == '省直轄縣級(jí)行政單位' or city == '自治區(qū)直轄縣級(jí)行政單位':sql_city = "insert into area_config values (null,'" + city + "','CITY'," + province_code + "," + province_code + ")"sql_list.add(sql_city)for area_code in area_dict:area = area_dict[area_code]print(area_code + "-區(qū)-" + area + "\n")sql_area = "insert into area_config values (null,'" + area + "','DISTRICT'," + area_code + "," + province_code + ")"sql_list.add(sql_area)else:sql_city = "insert into area_config values (null,'" + city + "','CITY'," + city_code + "," + province_code + ")"sql_list.add(sql_city)for area_code in area_dict:area = area_dict[area_code]print(area_code + "-區(qū)-" + area + "\n")sql_area = "insert into area_config values (null,'" + area + "','DISTRICT'," + area_code + "," + city_code + ")"sql_list.add(sql_area)print(str(sql_list))# 事務(wù)入庫empty_sql = "delete from area_config"self.connect_mysql(empty_sql, sql_list)# 獲取省def get_province(self):pattern = re.compile(r"var json =(.*?);", re.MULTILINE | re.DOTALL)script = self.soup.find("script", text=pattern)lists = str(pattern.search(script.text).group(1))json_list = json.loads(lists)# province_list = set()province_dict = dict()for json_data in json_list:province = json_data['shengji']quhua_code = json_data['quHuaDaiMa']province_dict.update({quhua_code: province})# province_list.add(province)# print(province_dict)return province_dict# 獲取市def get_city(self, shengji):body = ("shengji=" + shengji).encode('UTF-8')# body = "shengji='江蘇省(蘇)'"..encode('UTF-8')headers = {'Content-Type': "application/x-www-form-urlencoded; charset=utf-8",'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, ""like Gecko) Chrome/77.0.3865.120 Safari/537.36"}response = requests.post('http://xzqh.mca.gov.cn/selectJson', data=body, headers=headers)content = response.contentjson_list = json.loads(content)# city_list = set()city_dict = dict()for json_data in json_list:citys = json_data['diji']# city_list.add(citys)quhua_code = json_data['quHuaDaiMa']city_dict.update({quhua_code: citys})return city_dict# return city_list# 獲取區(qū)def get_area(self, shengji, diji):body = ("shengji=" + shengji + "&diji=" + diji).encode('UTF-8')headers = {'Content-Type': "application/x-www-form-urlencoded; charset=utf-8",'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, ""like Gecko) Chrome/77.0.3865.120 Safari/537.36"}response = requests.post('http://xzqh.mca.gov.cn/selectJson', data=body, headers=headers)content = response.contentjson_list = json.loads(content)# area_list = set()area_dict = dict()for json_data in json_list:area = json_data['xianji']# area_list.add(area)area_code = json_data['quHuaDaiMa']area_dict.update({area_code: area})return area_dict# return area_listdef connect_mysql(self, empty_sql, sql_list):cursor = self.db.cursor()try:cursor.execute(empty_sql)for sql in sql_list:cursor.execute(sql)print('=================================更新所有數(shù)據(jù)完成!=================================')except Exception as e:print('=================================更新失敗!=================================')print(e)self.db.rollback()finally:cursor.close()# 提交操作self.db.commit()if __name__ == '__main__':allAreaDataNew()
代碼執(zhí)行成功后就可以查到中國所有省市區(qū)啦!:
特殊情況:“省直轄縣級(jí)行政單位”和“自治區(qū)直轄縣級(jí)行政單位”
注意:部分省有特殊的“直轄縣級(jí)行政單位”或“自治區(qū)直轄縣級(jí)行政單位”