3.
@姑娘等等丶,requests_html?与requests有什么不同吗?真诚请教
6.
@yes_h_ut,8行代码
#coding=utf-8
# 爬取 丁香园 - 丁香医生
import re
from requests_html import HTMLSession
session = HTMLSession()
url = 'https://ncov.dxy.cn/ncovh5/view/pneumonia?'
r = session.get(url)
str = r.html.find('script#getListByCountryTypeService2', first=True).text
json = re.search("\[(.*?)\]",str)
print(json.group())
我是一名斗者,我正在被一名斗帝追杀,但我一点都不慌,因为他的马没我快,架!!!
7.
@姑娘等等丶,[{
"id": 953,
"createTime": 1580027704000,
"modifyTime": 1580561200000,
"tags": "",
"countryType": 2,
"continents": "亚洲",
"provinceId": "6",
"provinceName": "日本",
"provinceShortName": "",
"cityName": "",
"confirmedCount": 20,
"suspectedCount": 0,
"curedCount": 1,
"deadCount": 0,
"comment": "",
"sort": 0,
"operator": "xuyt",
"locationId": 951002
}, {
"id": 949,
"createTime": 1580027637000,
"modifyTime": 1580495765000,
"tags": "",
"countryType": 2,
"continents": "亚洲",
"provinceId": "2",
"provinceName": "泰国",
"provinceShortName": "",
"cityName": "",
"confirmedCount": 19,
"suspectedCount": 0,
"curedCount": 5,
"deadCount": 0,
"comment": "",
"sort": 0,
"operator": "xuyt",
"locationId": 952010
}, {
"id": 950,
"createTime": 1580027655000,
"modifyTime": 1580619404000,
"tags": "",
"countryType": 2,
"continents": "亚洲",
"provinceId": "3",
"provinceName": "新加坡",
"provinceShortName": "",
"cityName": "",
"confirmedCount": 18,
"suspectedCount": 0,
"curedCount": 0,
"deadCount": 0,
"comment": "",
"sort": 0,
"operator": "xuyt",
"locationId": 952009
}, {
"id": 954,
"createTime": 1580027721000,
"modifyTime": 1580609979000,
"tags": "",
"countryType": 2,
"continents": "亚洲",
"provinceId": "7",
"provinceName": "韩国",
"provinceShortName": "",
"cityName": "",
"confirmedCount": 15,
"suspectedCount": 0,
"curedCount": 0,
"deadCount": 0,
"comment": "",
"sort": 0,
"operator": "xuyt",
"locationId": 951004
}, {
"id": 958,
"createTime": 1580027777000,
"modifyTime": 1580620070000,
"tags": "",
"countryType": 2,
"continents": "大洋洲",
"provinceId": "10",
"provinceName": "澳大利亚",
"provinceShortName": "",
"cityName": "",
"confirmedCount": 12,
"suspectedCount": 0,
"curedCount": 2,
"deadCount": 0,
"comment": "",
"sort": 0,
"operator": "xuyt",
"locationId": 990001
}, {
"id": 1047,
"createTime": 1580167302000,
"modifyTime": 1580784147000,
"tags": "",
"countryType": 2,
"continents": "欧洲",
"provinceId": "10",
"provinceName": "德国",
"provinceShortName": "",
"cityName": "",
"confirmedCount": 12,
"suspectedCount": 0,
"curedCount": 0,
"deadCount": 0,
"comment": "",
"sort": 0,
"operator": "shiweiyi",
"locationId": 963003
}, {
"id": 955,
"createTime": 1580027735000,
"modifyTime": 1580771385000,
"tags": "",
"countryType": 2,
"continents": "北美洲",
"provinceId": "8",
"provinceName": "美国",
"provinceShortName": "",
"cityName": "",
"confirmedCount": 11,
"suspectedCount": 0,
"curedCount": 0,
"deadCount": 0,
"comment": "",
"sort": 0,
"operator": "xuyt",
"locationId": 971002
}, {
"id": 951,
"createTime": 1580027668000,
"modifyTime": 1580495813000,
"tags": "",
"countryType": 2,
"continents": "亚洲",
"provinceId": "4",
"provinceName": "马来西亚",
"provinceShortName": "",
"cityName": "",
"confirmedCount": 8,
"suspectedCount": 0,
"curedCount": 0,
"deadCount": 0,
"comment": "",
"sort": 0,
"operator": "xuyt",
"locationId": 952007
}, {
"id": 952,
"createTime": 1580027683000,
"modifyTime": 1580516875000,
"tags": "",
"countryType": 2,
"continents": "欧洲",
"provinceId": "5",
"provinceName": "法国",
"provinceShortName": "",
"cityName": "",
"confirmedCount": 6,
"suspectedCount": 0,
"curedCount": 0,
"deadCount": 0,
"comment": "",
"sort": 0,
"operator": "xuyt",
"locationId": 961002
}, {
"id": 956,
"createTime": 1580027751000,
"modifyTime": 1580539329000,
"tags": "",
"countryType": 2,
"continents": "亚洲",
"provinceId": "9",
"provinceName": "越南",
"provinceShortName": "",
"cityName": "",
"confirmedCount": 6,
"suspectedCount": 0,
"curedCount": 1,
"deadCount": 0,
"comment": "",
"sort": 0,
"operator": "xuyt",
"locationId": 952011
}, {
"id": 1650,
"createTime": 1580276720000,
"modifyTime": 1580621660000,
"tags": "",
"countryType": 2,
"continents": "亚洲",
"provinceId": "10",
"provinceName": "阿联酋",
"provinceShortName": "",
"cityName": "",
"confirmedCount": 5,
"suspectedCount": 0,
"curedCount": 0,
"deadCount": 0,
"comment": "",
"sort": 0,
"operator": "yuting",
"locationId": 955019
}, {
"id": 959,
"createTime": 1580027795000,
"modifyTime": 1580552879000,
"tags": "",
"countryType": 2,
"continents": "北美洲",
"provinceId": "10",
"provinceName": "加拿大",
"provinceShortName": "",
"cityName": "",
"confirmedCount": 4,
"suspectedCount": 0,
"curedCount": 0,
"deadCount": 0,
"comment": "",
"sort": 0,
"operator": "xuyt",
"locationId": 971001
}, {
"id": 1954,
"createTime": 1580375521000,
"modifyTime": 1580622802000,
"tags": "",
"countryType": 2,
"continents": "亚洲",
"provinceId": "10",
"provinceName": "印度",
"provinceShortName": "",
"cityName": "",
"confirmedCount": 2,
"suspectedCount": 0,
"curedCount": 0,
"deadCount": 0,
"comment": "",
"sort": 0,
"operator": "gaoh",
"locationId": 953003
}, {
"id": 1957,
"createTime": 1580425693000,
"modifyTime": 1580516912000,
"tags": "",
"countryType": 2,
"continents": "欧洲",
"provinceId": "10",
"provinceName": "意大利",
"provinceShortName": "",
"cityName": "",
"confirmedCount": 2,
"suspectedCount": 0,
"curedCount": 0,
"deadCount": 0,
"comment": "",
"sort": 0,
"operator": "shiweiyi",
"locationId": 965008
}, {
"id": 2280,
"createTime": 1580464639000,
"modifyTime": 1580495938000,
"tags": "",
"countryType": 2,
"continents": "欧洲",
"provinceId": "10",
"provinceName": "英国",
"provinceShortName": "",
"cityName": "",
"confirmedCount": 2,
"suspectedCount": 0,
"curedCount": 0,
"deadCount": 0,
"comment": "",
"sort": 0,
"operator": "leimanman",
"locationId": 961007
}, {
"id": 2281,
"createTime": 1580476133000,
"modifyTime": 1580495947000,
"tags": "",
"countryType": 2,
"continents": "欧洲",
"provinceId": "10",
"provinceName": "俄罗斯",
"provinceShortName": "",
"cityName": "",
"confirmedCount": 2,
"suspectedCount": 0,
"curedCount": 0,
"deadCount": 0,
"comment": "",
"sort": 0,
"operator": "hekaiqi",
"locationId": 964006
}, {
"id": 2870,
"createTime": 1580566796000,
"modifyTime": 1580614212000,
"tags": "",
"countryType": 2,
"continents": "亚洲",
"provinceId": "10",
"provinceName": "菲律宾",
"provinceShortName": "",
"cityName": "",
"confirmedCount": 2,
"suspectedCount": 0,
"curedCount": 0,
"deadCount": 1,
"comment": "",
"sort": 0,
"operator": "hekaiqi",
"locationId": 952008
}, {
"id": 957,
"createTime": 1580027764000,
"modifyTime": 1580495955000,
"tags": "",
"countryType": 2,
"continents": "亚洲",
"provinceId": "10",
"provinceName": "尼泊尔",
"provinceShortName": "",
"cityName": "",
"confirmedCount": 1,
"suspectedCount": 0,
"curedCount": 0,
"deadCount": 0,
"comment": "",
"sort": 0,
"operator": "xuyt",
"locationId": 953005
}, {
"id": 1023,
"createTime": 1580130061000,
"modifyTime": 1580495965000,
"tags": "",
"countryType": 2,
"continents": "亚洲",
"provinceId": "10",
"provinceName": "柬埔寨",
"provinceShortName": "",
"cityName": "",
"confirmedCount": 1,
"suspectedCount": 0,
"curedCount": 0,
"deadCount": 0,
"comment": "",
"sort": 0,
"operator": "hanting",
"locationId": 952003
}, {
"id": 1306,
"createTime": 1580182019000,
"modifyTime": 1580495982000,
"tags": "",
"countryType": 2,
"continents": "亚洲",
"provinceId": "10",
"provinceName": "斯里兰卡",
"provinceShortName": "",
"cityName": "",
"confirmedCount": 1,
"suspectedCount": 0,
"curedCount": 0,
"deadCount": 0,
"comment": "",
"sort": 0,
"operator": "zhuotingting",
"locationId": 953007
}, {
"id": 1656,
"createTime": 1580337882000,
"modifyTime": 1580495991000,
"tags": "",
"countryType": 2,
"continents": "欧洲",
"provinceId": "10",
"provinceName": "芬兰",
"provinceShortName": "",
"cityName": "",
"confirmedCount": 1,
"suspectedCount": 0,
"curedCount": 0,
"deadCount": 0,
"comment": "",
"sort": 0,
"operator": "zhuotingting",
"locationId": 962002
}, {
"id": 2293,
"createTime": 1580512517000,
"modifyTime": 1580514491000,
"tags": "",
"countryType": 2,
"continents": "欧洲",
"provinceId": "10",
"provinceName": "西班牙",
"provinceShortName": "",
"cityName": "",
"confirmedCount": 1,
"suspectedCount": 0,
"curedCount": 0,
"deadCount": 0,
"comment": "",
"sort": 0,
"operator": "zhuotingting",
"locationId": 965015
}, {
"id": 2596,
"createTime": 1580516851000,
"modifyTime": 1580516851000,
"tags": "",
"countryType": 2,
"continents": "欧洲",
"provinceId": "10",
"provinceName": "瑞典",
"provinceShortName": "",
"cityName": "",
"confirmedCount": 1,
"suspectedCount": 0,
"curedCount": 0,
"deadCount": 0,
"comment": "",
"sort": 0,
"operator": "leimanman",
"locationId": 962005
}]
最终获得的是这样一结果。
如果要获得id为953下边的provinceName的内容要怎么去操作,(说个思路就行)
但是类型是str,这个是不是有必要转成dict类型
8.
@yes_h_ut,百度搜json在线解析,复制进去解析后分析,再百度搜python json解析
我是一名斗者,我正在被一名斗帝追杀,但我一点都不慌,因为他的马没我快,架!!!
9.
@姑娘等等丶,
百度基本上是以bs4库处理。。。
....
r=requests.get(url1,headers=headers)
bs = BeautifulSoup(r.content.decode("utf-8"), "lxml")
# print(bs.prettify())#格式化打印bs的内容
bs1=bs.find_all('script',attrs={'id':'getAreaStat'})
得到的结果(遇到的问题):
1.含有标签头[<script id="getAreaStat">,尝试用json1 = re.search("\[(.*?)\]",str1)将标签头去掉,提示类型不对【find_all后的输出如何不带tag】
2.输出的类型是 bs4.element.ResultSet,长度为1,不能用for循环取值。
是否需要将类型转化才能以下面的结构输出:
for ix in range(1,len(decodejson)):
if '浙江省' in decodejson[ix]['provinceName']:
pprint.pprint(str(ix))
pprint.pprint('death: '+str(decodejson[ix]['deadCount']))
pprint.pprint('confirmedCount: '+str(decodejson[ix]['confirmedCount']))
pprint.pprint('suspectedCount: '+str(decodejson[ix]['suspectedCount']))
以上
10.
@yes_h_ut,
#coding=utf-8
# 爬取 丁香园 - 丁香医生
import re
import json
from requests_html import HTMLSession
session = HTMLSession()
url = 'https://ncov.dxy.cn/ncovh5/view/pneumonia?'
r = session.get(url)
str = r.html.find('script#getListByCountryTypeService2', first=True).text
data = re.search("\[(.*?)\]",str)
json_str = json.loads(data.group())
print(json_str[0]) #数组
print(json_str[0]["provinceName"]) #数组内json对象
我是一名斗者,我正在被一名斗帝追杀,但我一点都不慌,因为他的马没我快,架!!!
11.
@姑娘等等丶,
#encoding='UTF-8
import requests
import os
from bs4 import BeautifulSoup
import re
import json
import pprint
url1='
https://ncov.dxy.cn/ncovh5/view/pneumonia?'
headers={
'User-Agent':'self-defind-user-agent',
'Cookie':'name=self-define-cookies-in header',
'charset':'utf-8'
}
r=requests.get(url1,headers=headers)
bs = BeautifulSoup(r.content.decode("utf-8"), "lxml")
# print(bs.prettify())#格式化打印bs的内容
bs1=bs.find_all('script',attrs={'id':'getAreaStat'})
print(str(type(bs1[0])))
print('............................................')
x=[]
x=bs1[0].string
re_result = re.search("\[(.*?)\}]}]",x)#\[(.*?)\] }]}]
# pprint.pprint(re_result.group())
rrr=re_result.group()[0:]
decodejson = json.loads(rrr)
# pprint.pprint(decodejson)
pprint.pprint(str(type(decodejson)))
print('............................................')
# with open('7.txt','w',encoding='utf-8') as f:
# f.write(json.dumps(decodejson,ensure_ascii=False))
# pprint.pprint('death: '+str(decodejson[10]['cities'][2]['deadCount']))
# pprint.pprint(str(type(decodejson[10]['cities'])))
# for m in range(1,len(decodejson[10]['cities'])):
# pprint.pprint(str(decodejson[10]['cities'][m]['cityName']))
for ix in range(0,len(decodejson)):
for iy in range(0,len(decodejson[ix]['cities'])):
# pprint.pprint('death: '+str(decodejson[ix]['cities'][iy]['deadCount']))
if '成都' in decodejson[ix]['cities'][iy]['cityName']:
pprint.pprint('cityName: '+str(decodejson[ix]['cities'][iy]['cityName']))
pprint.pprint('confirmedCount: '+str(decodejson[ix]['cities'][iy]['confirmedCount']))
pprint.pprint('suspectedCount: '+str(decodejson[ix]['cities'][iy]['suspectedCount']))
pprint.pprint('curedCount: '+str(decodejson[ix]['cities'][iy]['curedCount']))
pprint.pprint('deadCount: '+str(decodejson[ix]['cities'][iy]['deadCount']))
print('............................................')
谢谢指点。
折腾好久才弄出来,不过总感觉有些操作是没有用的。