This commit is contained in:
2025-08-13 16:02:21 +08:00
commit fa25bfd784
11 changed files with 3403 additions and 0 deletions

144
jd/test.py Normal file
View File

@@ -0,0 +1,144 @@
import time
import random
import json
from DrissionPage import ChromiumPage, ChromiumOptions
# 设置浏览器路径(请替换为本地 Chrome 的实际安装路径)
chrome_path = r'C:\Program Files\Google\Chrome\Application\chrome.exe'
# 配置并启动带指定路径的浏览器
options = ChromiumOptions()
options.set_browser_path(chrome_path)
# 创建浏览器实例
page = ChromiumPage(options)
try:
# 打开京东商品页面
page.get('https://item.jd.com/100104238904.html#crumb-wrap')
while True: # 持续运行循环
try:
# 等待页面加载完成(可手动处理验证码)
time.sleep(random.uniform(1, 6))
# 向下滚动主页面
page.scroll.down(150)
time.sleep(random.uniform(1, 3))
# 定位并点击“买家赞不绝口”
element1 = page.ele('xpath=//div[contains(text(), "买家赞不绝口")]')
if element1:
print(f"找到元素:{element1.text}")
element1.click()
time.sleep(random.uniform(2, 4))
else:
print("未找到第一个元素")
# 定位并点击“当前商品”
element2 = page.ele('xpath=//div[contains(text(), "当前商品")]')
if element2:
print(f"找到元素:{element2.text}")
element2.click()
time.sleep(random.uniform(2, 4))
else:
print("未找到第二个元素")
# 定位并点击“图/视频”
element3 = page.ele('xpath=//div[contains(text(), "视频")]')
if element3:
print(f"找到元素:{element3.text}")
element3.click()
time.sleep(random.uniform(2, 4))
else:
print("未找到第三个元素")
# 定位弹窗区域
popup = page.ele('xpath=//*[@id="rateList"]/div/div[3]')
if popup:
# 开始监听目标接口请求
page.listen.start('https://api.m.jd.com/client.action')
# 循环滚动直到成功获取新评论数据
max_retries = 5 # 最大尝试次数
retry_count = 0
success = False
while retry_count < max_retries and not success:
# 随机滚动一定像素
scroll_amount = random.randint(1000, 4000)
popup.scroll.down(scroll_amount)
print(f"弹窗向下滚动了 {scroll_amount} 像素")
# 滚动后等待一段时间,模拟真实用户行为
time.sleep(random.uniform(1, 3))
# 等待新的评论数据请求
resp = page.listen.wait(timeout=5)
if resp and 'getCommentListPage' in resp.request.postData:
print("成功捕获到新的评论数据请求!")
# 解析 resp.body 中的 JSON 数据
try:
json_data = resp.response.body
# 提取评论楼层(第三个楼层)
if 'result' in json_data and 'floors' in json_data['result']:
comment_floor = json_data['result']['floors'][2] # 索引从0开始
if 'data' in comment_floor and isinstance(comment_floor['data'], list):
comments = comment_floor['data']
print(f"成功提取到 {len(comments)} 条评论:\n")
for idx, comment in enumerate(comments, 1):
comment_info = comment.get('commentInfo', {})
user_name = comment_info.get('userNickName', '匿名用户')
comment_text = comment_info.get('commentData', '无评论内容')
commentId = comment_info.get('commentId', '评价ID')
productId = comment_info.get('productId', '商品ID')
# 提取所有图片链接
picture_list = comment_info.get('pictureInfoList', [])
picture_urls = [pic.get('largePicURL') for pic in picture_list if pic.get('largePicURL')]
if picture_urls:
print(f"{idx} 条评论:")
print(f"用户名:{user_name}")
print(f"评分:{commentId}")
print(f"商品ID{productId}")
print(f"评论内容:{comment_text}")
print(f"图片链接:{picture_urls}\n")
else:
print("未找到有效的评论数据或数据格式异常。")
else:
print("返回数据中不包含评论楼层信息。")
except json.JSONDecodeError as je:
print("JSON 解析失败:", je)
except Exception as e:
print("处理评论数据时出错:", e)
else:
print("未捕获到新的评论数据,继续滚动...")
retry_count += 1
if not success:
print("多次滚动后仍未获取到有效评论数据,请检查页面结构或网络请求状态。")
else:
print("未找到弹窗元素")
# 添加随机等待时间以模拟真实用户行为
wait_time = random.uniform(5, 10)
print(f"等待 {wait_time:.2f} 秒后继续下一轮操作...")
time.sleep(wait_time)
except Exception as inner_e:
print(f"内部循环发生错误:{inner_e}")
time.sleep(5) # 出错后稍作等待再继续
except KeyboardInterrupt:
print("用户中断脚本执行")
except Exception as outer_e:
print(f"外部异常:{outer_e}")
finally:
# 关闭浏览器
print("正在关闭浏览器...")
# page.quit()