1

2025-08-13 16:02:21 +08:00
commit fa25bfd784
11 changed files with 3403 additions and 0 deletions
--- a/jd/test.py
+++ b/jd/test.py
@@ -0,0 +1,144 @@
+import time
+import random
+import json
+from DrissionPage import ChromiumPage, ChromiumOptions
+
+# 设置浏览器路径（请替换为本地 Chrome 的实际安装路径）
+chrome_path = r'C:\Program Files\Google\Chrome\Application\chrome.exe'
+
+# 配置并启动带指定路径的浏览器
+options = ChromiumOptions()
+options.set_browser_path(chrome_path)
+
+# 创建浏览器实例
+page = ChromiumPage(options)
+
+try:
+    # 打开京东商品页面
+    page.get('https://item.jd.com/100104238904.html#crumb-wrap')
+
+    while True:  # 持续运行循环
+        try:
+            # 等待页面加载完成（可手动处理验证码）
+            time.sleep(random.uniform(1, 6))
+
+            # 向下滚动主页面
+            page.scroll.down(150)
+            time.sleep(random.uniform(1, 3))
+
+            # 定位并点击“买家赞不绝口”
+            element1 = page.ele('xpath=//div[contains(text(), "买家赞不绝口")]')
+            if element1:
+                print(f"找到元素：{element1.text}")
+                element1.click()
+                time.sleep(random.uniform(2, 4))
+            else:
+                print("未找到第一个元素")
+
+            # 定位并点击“当前商品”
+            element2 = page.ele('xpath=//div[contains(text(), "当前商品")]')
+            if element2:
+                print(f"找到元素：{element2.text}")
+                element2.click()
+                time.sleep(random.uniform(2, 4))
+            else:
+                print("未找到第二个元素")
+            # 定位并点击“图/视频”
+            element3 = page.ele('xpath=//div[contains(text(), "视频")]')
+            if element3:
+                print(f"找到元素：{element3.text}")
+                element3.click()
+                time.sleep(random.uniform(2, 4))
+            else:
+                print("未找到第三个元素")
+
+            # 定位弹窗区域
+            popup = page.ele('xpath=//*[@id="rateList"]/div/div[3]')
+            if popup:
+                # 开始监听目标接口请求
+                page.listen.start('https://api.m.jd.com/client.action')
+
+                # 循环滚动直到成功获取新评论数据
+                max_retries = 5  # 最大尝试次数
+                retry_count = 0
+                success = False
+
+                while retry_count < max_retries and not success:
+                    # 随机滚动一定像素
+                    scroll_amount = random.randint(1000, 4000)
+                    popup.scroll.down(scroll_amount)
+                    print(f"弹窗向下滚动了 {scroll_amount} 像素")
+
+                    # 滚动后等待一段时间，模拟真实用户行为
+                    time.sleep(random.uniform(1, 3))
+
+                    # 等待新的评论数据请求
+                    resp = page.listen.wait(timeout=5)
+
+                    if resp and 'getCommentListPage' in resp.request.postData:
+                        print("成功捕获到新的评论数据请求！")
+                        # 解析 resp.body 中的 JSON 数据
+                        try:
+                            json_data = resp.response.body
+
+                            # 提取评论楼层（第三个楼层）
+                            if 'result' in json_data and 'floors' in json_data['result']:
+                                comment_floor = json_data['result']['floors'][2]  # 索引从0开始
+
+                                if 'data' in comment_floor and isinstance(comment_floor['data'], list):
+                                    comments = comment_floor['data']
+
+                                    print(f"成功提取到 {len(comments)} 条评论：\n")
+
+                                    for idx, comment in enumerate(comments, 1):
+                                        comment_info = comment.get('commentInfo', {})
+
+                                        user_name = comment_info.get('userNickName', '匿名用户')
+                                        comment_text = comment_info.get('commentData', '无评论内容')
+                                        commentId = comment_info.get('commentId', '评价ID')
+                                        productId = comment_info.get('productId', '商品ID')
+                                        # 提取所有图片链接
+                                        picture_list = comment_info.get('pictureInfoList', [])
+                                        picture_urls = [pic.get('largePicURL') for pic in picture_list if pic.get('largePicURL')]
+
+                                        if picture_urls:
+                                            print(f"第 {idx} 条评论：")
+                                            print(f"用户名：{user_name}")
+                                            print(f"评分：{commentId}")
+                                            print(f"商品ID：{productId}")
+                                            print(f"评论内容：{comment_text}")
+                                            print(f"图片链接：{picture_urls}\n")
+                                else:
+                                    print("未找到有效的评论数据或数据格式异常。")
+                            else:
+                                print("返回数据中不包含评论楼层信息。")
+                        except json.JSONDecodeError as je:
+                            print("JSON 解析失败:", je)
+                        except Exception as e:
+                            print("处理评论数据时出错：", e)
+                    else:
+                        print("未捕获到新的评论数据，继续滚动...")
+                        retry_count += 1
+
+                if not success:
+                    print("多次滚动后仍未获取到有效评论数据，请检查页面结构或网络请求状态。")
+            else:
+                print("未找到弹窗元素")
+
+            # 添加随机等待时间以模拟真实用户行为
+            wait_time = random.uniform(5, 10)
+            print(f"等待 {wait_time:.2f} 秒后继续下一轮操作...")
+            time.sleep(wait_time)
+
+        except Exception as inner_e:
+            print(f"内部循环发生错误：{inner_e}")
+            time.sleep(5)  # 出错后稍作等待再继续
+
+except KeyboardInterrupt:
+    print("用户中断脚本执行")
+except Exception as outer_e:
+    print(f"外部异常：{outer_e}")
+finally:
+    # 关闭浏览器
+    print("正在关闭浏览器...")
+    # page.quit()