Files
tb_pl/jd/test.py
2025-08-13 16:02:21 +08:00

145 lines
6.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import time
import random
import json
from DrissionPage import ChromiumPage, ChromiumOptions
# 设置浏览器路径(请替换为本地 Chrome 的实际安装路径)
chrome_path = r'C:\Program Files\Google\Chrome\Application\chrome.exe'
# 配置并启动带指定路径的浏览器
options = ChromiumOptions()
options.set_browser_path(chrome_path)
# 创建浏览器实例
page = ChromiumPage(options)
try:
# 打开京东商品页面
page.get('https://item.jd.com/100104238904.html#crumb-wrap')
while True: # 持续运行循环
try:
# 等待页面加载完成(可手动处理验证码)
time.sleep(random.uniform(1, 6))
# 向下滚动主页面
page.scroll.down(150)
time.sleep(random.uniform(1, 3))
# 定位并点击“买家赞不绝口”
element1 = page.ele('xpath=//div[contains(text(), "买家赞不绝口")]')
if element1:
print(f"找到元素:{element1.text}")
element1.click()
time.sleep(random.uniform(2, 4))
else:
print("未找到第一个元素")
# 定位并点击“当前商品”
element2 = page.ele('xpath=//div[contains(text(), "当前商品")]')
if element2:
print(f"找到元素:{element2.text}")
element2.click()
time.sleep(random.uniform(2, 4))
else:
print("未找到第二个元素")
# 定位并点击“图/视频”
element3 = page.ele('xpath=//div[contains(text(), "视频")]')
if element3:
print(f"找到元素:{element3.text}")
element3.click()
time.sleep(random.uniform(2, 4))
else:
print("未找到第三个元素")
# 定位弹窗区域
popup = page.ele('xpath=//*[@id="rateList"]/div/div[3]')
if popup:
# 开始监听目标接口请求
page.listen.start('https://api.m.jd.com/client.action')
# 循环滚动直到成功获取新评论数据
max_retries = 5 # 最大尝试次数
retry_count = 0
success = False
while retry_count < max_retries and not success:
# 随机滚动一定像素
scroll_amount = random.randint(1000, 4000)
popup.scroll.down(scroll_amount)
print(f"弹窗向下滚动了 {scroll_amount} 像素")
# 滚动后等待一段时间,模拟真实用户行为
time.sleep(random.uniform(1, 3))
# 等待新的评论数据请求
resp = page.listen.wait(timeout=5)
if resp and 'getCommentListPage' in resp.request.postData:
print("成功捕获到新的评论数据请求!")
# 解析 resp.body 中的 JSON 数据
try:
json_data = resp.response.body
# 提取评论楼层(第三个楼层)
if 'result' in json_data and 'floors' in json_data['result']:
comment_floor = json_data['result']['floors'][2] # 索引从0开始
if 'data' in comment_floor and isinstance(comment_floor['data'], list):
comments = comment_floor['data']
print(f"成功提取到 {len(comments)} 条评论:\n")
for idx, comment in enumerate(comments, 1):
comment_info = comment.get('commentInfo', {})
user_name = comment_info.get('userNickName', '匿名用户')
comment_text = comment_info.get('commentData', '无评论内容')
commentId = comment_info.get('commentId', '评价ID')
productId = comment_info.get('productId', '商品ID')
# 提取所有图片链接
picture_list = comment_info.get('pictureInfoList', [])
picture_urls = [pic.get('largePicURL') for pic in picture_list if pic.get('largePicURL')]
if picture_urls:
print(f"{idx} 条评论:")
print(f"用户名:{user_name}")
print(f"评分:{commentId}")
print(f"商品ID{productId}")
print(f"评论内容:{comment_text}")
print(f"图片链接:{picture_urls}\n")
else:
print("未找到有效的评论数据或数据格式异常。")
else:
print("返回数据中不包含评论楼层信息。")
except json.JSONDecodeError as je:
print("JSON 解析失败:", je)
except Exception as e:
print("处理评论数据时出错:", e)
else:
print("未捕获到新的评论数据,继续滚动...")
retry_count += 1
if not success:
print("多次滚动后仍未获取到有效评论数据,请检查页面结构或网络请求状态。")
else:
print("未找到弹窗元素")
# 添加随机等待时间以模拟真实用户行为
wait_time = random.uniform(5, 10)
print(f"等待 {wait_time:.2f} 秒后继续下一轮操作...")
time.sleep(wait_time)
except Exception as inner_e:
print(f"内部循环发生错误:{inner_e}")
time.sleep(5) # 出错后稍作等待再继续
except KeyboardInterrupt:
print("用户中断脚本执行")
except Exception as outer_e:
print(f"外部异常:{outer_e}")
finally:
# 关闭浏览器
print("正在关闭浏览器...")
# page.quit()