今天摸了一天鱼,顺便耍了一会儿Python,觉得自己永远都买不起南京的房了。
1import requests
2import matplotlib.pyplot as plt
3from bs4 import BeautifulSoup
4import pandas as pd
5import seaborn as sns
6raw_url = 'https://nj.fang.lianjia.com/loupan/nhs1pg'
7headers = {
8 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36'
9}
10li = []
11#一共45页,获取所有楼盘信息
12for index in range(1,45):
13 url = raw_url + str(index)
14 r = requests.get(url, headers = headers)
15 soup = BeautifulSoup(r.text, 'lxml')
16 result = soup.find_all('li', "resblock-list post_ulog_exposure_scroll has-results")
17 for i in range(len(result)):
18 try:
19 one = {
20 #楼盘名称
21 'title': str(result[i].find('a').get('title')),
22 #所在区域
23 'location': str(result[i].find('div',"resblock-location").span.string),
24 #面积
25 'area': str(result[i].find('div', "resblock-area").span.string),
26 #房价
27 'price': int(result[i].find('div', 'main-price').find('span', 'number').string),
28 #房价单位
29 'desc': str(result[i].find('div', 'main-price').find('span', 'desc').string)
30 }
31 li.append(one)
32 except:
33 continue
34#生成DataFrame
35df = pd.DataFrame(li)
36#过滤按套计价的房型
37filtered = df[df['desc'] == '\xa0元/平(均价)']
38#获取均价
39price = filtered.groupby('location').mean()
40#降序排列
41price = price.sort_values('price', ascending=False)
42#绘图
43f, ax = plt.subplots(figsize=(16,10))
44sns.set(font='SimHei')
45sns.barplot(x=price.index, y=price.price, data=price, ax=ax)
46plt.title('Average house price in different districts of Nanjing', fontsize=2000, fontproperties='DejaVu Sans')
47plt.show()
结果如下:
Comments | NOTHING