【背景】 孵化新功能应用管控需要大量应用评分数据,无相关数据,BG侧规划新功能需要数据支撑。
【预研】 调研友商已有大量评分数据,数据使用无限级分类,人工收集难度大,计划编写数据爬虫获取数据。
【爬虫编码】
组装请求体,获取单列数据: def request_data(selector):
""" 获取应用单列数据 """ url = 'https://epvqdk.manage.trendmicro.com/WebApp/OSCE_iAC/OsceIac/ApiEntry' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36', 'X-Requested-With': 'XMLHttpRequest', 'Cookie': COOKIE } data = { "param": { "category": "rule", "function": "getCssTreeNode", "parameter": { "selector": selector, "showOption": 0, "upperLevel": 0, "lowerLevel": 0 }, "body": {} } } response = requests.put(url, headers=headers, json=data) result = response.json() return result['Data']['data']['cssTreeNodeList']
指定Excel键组: def get_excel_keys():
"""
获取Excel键数组
"""
return [
'path_name',
'name',
'airScore',
'major',
'minor',
'globalUsage',
'apacUsage',
'emeaUsage',
'jpnUsage',
'larUsage',
'nabuUsage',
'ordering',
'siblingCount',
'childCount'
]
递归进行数据获取: def recursion_selector(item, row, parent_name):
"""
递归选择查询
"""
if item['childCount'] > 0:
data = request_data(item['selector'])
for new_item in data:
new_item["path_name"] = parent_name + " -> " + str(new_item['name'])
# print('子类:' + new_item["path_name"] + ' 评分:' + str(new_item['airScore']))
one_excel_data = []
for excel_key in get_excel_keys():
one_excel_data.append(new_item[excel_key])
# 写入数据集合
row.append(one_excel_data)
recursion_selector(new_item, row, new_item["path_name"])
return row
写入Excel工作表: def save_excel_file():
"""
开始写入excel文件
"""
# 数据集合
row = []
# 请求参数
selector = []
data = request_data(selector)
for item in data:
# print('父类:' + str(item['name']))
parent_name = item['name']
item["path_name"] = item['name']
one_excel_data = []
for excel_key in get_excel_keys():
one_excel_data.append(item[excel_key])
# 写入数据集合
row.append(one_excel_data)
# 递归子类
recursion_selector(item, row, parent_name)
# # 暂时只输出一个
# break
# print(row)
# 创建一个工作簿和工作表
wb = Workbook()
ws = wb.active
# 表头
one_excel_data = []
for excel_key in get_excel_keys():
one_excel_data.append(excel_key)
ws.append(one_excel_data)
# 将数组中的数据写入工作表中
for item in row:
ws.append(item)
# 将工作簿保存为Excel文件
wb.save('output.xlsx')
【数据爬取结果】 |