import time import random import json from DrissionPage import ChromiumPage, ChromiumOptions # 设置浏览器路径(请替换为本地 Chrome 的实际安装路径) chrome_path = r'C:\Program Files\Google\Chrome\Application\chrome.exe' # 配置并启动带指定路径的浏览器 options = ChromiumOptions() options.set_browser_path(chrome_path) # 创建浏览器实例 page = ChromiumPage(options) try: # 打开京东商品页面 page.get('https://item.jd.com/100104238904.html#crumb-wrap') while True: # 持续运行循环 try: # 等待页面加载完成(可手动处理验证码) time.sleep(random.uniform(1, 6)) # 向下滚动主页面 page.scroll.down(150) time.sleep(random.uniform(1, 3)) # 定位并点击“买家赞不绝口” element1 = page.ele('xpath=//div[contains(text(), "买家赞不绝口")]') if element1: print(f"找到元素:{element1.text}") element1.click() time.sleep(random.uniform(2, 4)) else: print("未找到第一个元素") # 定位并点击“当前商品” element2 = page.ele('xpath=//div[contains(text(), "当前商品")]') if element2: print(f"找到元素:{element2.text}") element2.click() time.sleep(random.uniform(2, 4)) else: print("未找到第二个元素") # 定位并点击“图/视频” element3 = page.ele('xpath=//div[contains(text(), "视频")]') if element3: print(f"找到元素:{element3.text}") element3.click() time.sleep(random.uniform(2, 4)) else: print("未找到第三个元素") # 定位弹窗区域 popup = page.ele('xpath=//*[@id="rateList"]/div/div[3]') if popup: # 开始监听目标接口请求 page.listen.start('https://api.m.jd.com/client.action') # 循环滚动直到成功获取新评论数据 max_retries = 5 # 最大尝试次数 retry_count = 0 success = False while retry_count < max_retries and not success: # 随机滚动一定像素 scroll_amount = random.randint(1000, 4000) popup.scroll.down(scroll_amount) print(f"弹窗向下滚动了 {scroll_amount} 像素") # 滚动后等待一段时间,模拟真实用户行为 time.sleep(random.uniform(1, 3)) # 等待新的评论数据请求 resp = page.listen.wait(timeout=5) if resp and 'getCommentListPage' in resp.request.postData: print("成功捕获到新的评论数据请求!") # 解析 resp.body 中的 JSON 数据 try: json_data = resp.response.body # 提取评论楼层(第三个楼层) if 'result' in json_data and 'floors' in json_data['result']: comment_floor = json_data['result']['floors'][2] # 索引从0开始 if 'data' in comment_floor and isinstance(comment_floor['data'], list): comments = comment_floor['data'] print(f"成功提取到 {len(comments)} 条评论:\n") for idx, comment in enumerate(comments, 1): comment_info = comment.get('commentInfo', {}) user_name = comment_info.get('userNickName', '匿名用户') comment_text = comment_info.get('commentData', '无评论内容') commentId = comment_info.get('commentId', '评价ID') productId = comment_info.get('productId', '商品ID') # 提取所有图片链接 picture_list = comment_info.get('pictureInfoList', []) picture_urls = [pic.get('largePicURL') for pic in picture_list if pic.get('largePicURL')] if picture_urls: print(f"第 {idx} 条评论:") print(f"用户名:{user_name}") print(f"评分:{commentId}") print(f"商品ID:{productId}") print(f"评论内容:{comment_text}") print(f"图片链接:{picture_urls}\n") else: print("未找到有效的评论数据或数据格式异常。") else: print("返回数据中不包含评论楼层信息。") except json.JSONDecodeError as je: print("JSON 解析失败:", je) except Exception as e: print("处理评论数据时出错:", e) else: print("未捕获到新的评论数据,继续滚动...") retry_count += 1 if not success: print("多次滚动后仍未获取到有效评论数据,请检查页面结构或网络请求状态。") else: print("未找到弹窗元素") # 添加随机等待时间以模拟真实用户行为 wait_time = random.uniform(5, 10) print(f"等待 {wait_time:.2f} 秒后继续下一轮操作...") time.sleep(wait_time) except Exception as inner_e: print(f"内部循环发生错误:{inner_e}") time.sleep(5) # 出错后稍作等待再继续 except KeyboardInterrupt: print("用户中断脚本执行") except Exception as outer_e: print(f"外部异常:{outer_e}") finally: # 关闭浏览器 print("正在关闭浏览器...") # page.quit()