1
This commit is contained in:
144
jd/test.py
Normal file
144
jd/test.py
Normal file
@@ -0,0 +1,144 @@
|
||||
import time
|
||||
import random
|
||||
import json
|
||||
from DrissionPage import ChromiumPage, ChromiumOptions
|
||||
|
||||
# 设置浏览器路径(请替换为本地 Chrome 的实际安装路径)
|
||||
chrome_path = r'C:\Program Files\Google\Chrome\Application\chrome.exe'
|
||||
|
||||
# 配置并启动带指定路径的浏览器
|
||||
options = ChromiumOptions()
|
||||
options.set_browser_path(chrome_path)
|
||||
|
||||
# 创建浏览器实例
|
||||
page = ChromiumPage(options)
|
||||
|
||||
try:
|
||||
# 打开京东商品页面
|
||||
page.get('https://item.jd.com/100104238904.html#crumb-wrap')
|
||||
|
||||
while True: # 持续运行循环
|
||||
try:
|
||||
# 等待页面加载完成(可手动处理验证码)
|
||||
time.sleep(random.uniform(1, 6))
|
||||
|
||||
# 向下滚动主页面
|
||||
page.scroll.down(150)
|
||||
time.sleep(random.uniform(1, 3))
|
||||
|
||||
# 定位并点击“买家赞不绝口”
|
||||
element1 = page.ele('xpath=//div[contains(text(), "买家赞不绝口")]')
|
||||
if element1:
|
||||
print(f"找到元素:{element1.text}")
|
||||
element1.click()
|
||||
time.sleep(random.uniform(2, 4))
|
||||
else:
|
||||
print("未找到第一个元素")
|
||||
|
||||
# 定位并点击“当前商品”
|
||||
element2 = page.ele('xpath=//div[contains(text(), "当前商品")]')
|
||||
if element2:
|
||||
print(f"找到元素:{element2.text}")
|
||||
element2.click()
|
||||
time.sleep(random.uniform(2, 4))
|
||||
else:
|
||||
print("未找到第二个元素")
|
||||
# 定位并点击“图/视频”
|
||||
element3 = page.ele('xpath=//div[contains(text(), "视频")]')
|
||||
if element3:
|
||||
print(f"找到元素:{element3.text}")
|
||||
element3.click()
|
||||
time.sleep(random.uniform(2, 4))
|
||||
else:
|
||||
print("未找到第三个元素")
|
||||
|
||||
# 定位弹窗区域
|
||||
popup = page.ele('xpath=//*[@id="rateList"]/div/div[3]')
|
||||
if popup:
|
||||
# 开始监听目标接口请求
|
||||
page.listen.start('https://api.m.jd.com/client.action')
|
||||
|
||||
# 循环滚动直到成功获取新评论数据
|
||||
max_retries = 5 # 最大尝试次数
|
||||
retry_count = 0
|
||||
success = False
|
||||
|
||||
while retry_count < max_retries and not success:
|
||||
# 随机滚动一定像素
|
||||
scroll_amount = random.randint(1000, 4000)
|
||||
popup.scroll.down(scroll_amount)
|
||||
print(f"弹窗向下滚动了 {scroll_amount} 像素")
|
||||
|
||||
# 滚动后等待一段时间,模拟真实用户行为
|
||||
time.sleep(random.uniform(1, 3))
|
||||
|
||||
# 等待新的评论数据请求
|
||||
resp = page.listen.wait(timeout=5)
|
||||
|
||||
if resp and 'getCommentListPage' in resp.request.postData:
|
||||
print("成功捕获到新的评论数据请求!")
|
||||
# 解析 resp.body 中的 JSON 数据
|
||||
try:
|
||||
json_data = resp.response.body
|
||||
|
||||
# 提取评论楼层(第三个楼层)
|
||||
if 'result' in json_data and 'floors' in json_data['result']:
|
||||
comment_floor = json_data['result']['floors'][2] # 索引从0开始
|
||||
|
||||
if 'data' in comment_floor and isinstance(comment_floor['data'], list):
|
||||
comments = comment_floor['data']
|
||||
|
||||
print(f"成功提取到 {len(comments)} 条评论:\n")
|
||||
|
||||
for idx, comment in enumerate(comments, 1):
|
||||
comment_info = comment.get('commentInfo', {})
|
||||
|
||||
user_name = comment_info.get('userNickName', '匿名用户')
|
||||
comment_text = comment_info.get('commentData', '无评论内容')
|
||||
commentId = comment_info.get('commentId', '评价ID')
|
||||
productId = comment_info.get('productId', '商品ID')
|
||||
# 提取所有图片链接
|
||||
picture_list = comment_info.get('pictureInfoList', [])
|
||||
picture_urls = [pic.get('largePicURL') for pic in picture_list if pic.get('largePicURL')]
|
||||
|
||||
if picture_urls:
|
||||
print(f"第 {idx} 条评论:")
|
||||
print(f"用户名:{user_name}")
|
||||
print(f"评分:{commentId}")
|
||||
print(f"商品ID:{productId}")
|
||||
print(f"评论内容:{comment_text}")
|
||||
print(f"图片链接:{picture_urls}\n")
|
||||
else:
|
||||
print("未找到有效的评论数据或数据格式异常。")
|
||||
else:
|
||||
print("返回数据中不包含评论楼层信息。")
|
||||
except json.JSONDecodeError as je:
|
||||
print("JSON 解析失败:", je)
|
||||
except Exception as e:
|
||||
print("处理评论数据时出错:", e)
|
||||
else:
|
||||
print("未捕获到新的评论数据,继续滚动...")
|
||||
retry_count += 1
|
||||
|
||||
if not success:
|
||||
print("多次滚动后仍未获取到有效评论数据,请检查页面结构或网络请求状态。")
|
||||
else:
|
||||
print("未找到弹窗元素")
|
||||
|
||||
# 添加随机等待时间以模拟真实用户行为
|
||||
wait_time = random.uniform(5, 10)
|
||||
print(f"等待 {wait_time:.2f} 秒后继续下一轮操作...")
|
||||
time.sleep(wait_time)
|
||||
|
||||
except Exception as inner_e:
|
||||
print(f"内部循环发生错误:{inner_e}")
|
||||
time.sleep(5) # 出错后稍作等待再继续
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("用户中断脚本执行")
|
||||
except Exception as outer_e:
|
||||
print(f"外部异常:{outer_e}")
|
||||
finally:
|
||||
# 关闭浏览器
|
||||
print("正在关闭浏览器...")
|
||||
# page.quit()
|
||||
Reference in New Issue
Block a user