1
This commit is contained in:
154
jd/logistics.py
Normal file
154
jd/logistics.py
Normal file
@@ -0,0 +1,154 @@
|
||||
import time
|
||||
import json
|
||||
import re
|
||||
from DrissionPage import ChromiumPage, ChromiumOptions
|
||||
|
||||
# 设置浏览器路径
|
||||
CHROME_PATH = r'C:\Program Files\Google\Chrome\Application\chrome.exe'
|
||||
|
||||
# 物流追踪页面 URL
|
||||
TRACKING_URL = "https://3.cn/2t-Iibig"
|
||||
|
||||
# 配置并启动浏览器
|
||||
options = ChromiumOptions()
|
||||
options.set_browser_path(CHROME_PATH)
|
||||
|
||||
# 创建浏览器实例
|
||||
page = ChromiumPage(options)
|
||||
|
||||
try:
|
||||
print("正在打开物流追踪页面...")
|
||||
page.get(TRACKING_URL)
|
||||
|
||||
# 等待页面加载
|
||||
time.sleep(5)
|
||||
|
||||
print("\n=== 方法1: 尝试从页面元素提取信息 ===")
|
||||
|
||||
# 尝试提取运单号
|
||||
waybill_elements = page.eles('xpath=//*[contains(text(), "运单号")]')
|
||||
if waybill_elements:
|
||||
print(f"找到运单号相关元素: {len(waybill_elements)} 个")
|
||||
for elem in waybill_elements:
|
||||
print(f" 文本: {elem.text}")
|
||||
# 尝试获取父元素或兄弟元素
|
||||
parent = elem.parent()
|
||||
if parent:
|
||||
print(f" 父元素文本: {parent.text[:100]}")
|
||||
|
||||
# 尝试提取承运人信息
|
||||
carrier_elements = page.eles('xpath=//*[contains(text(), "承运人")]')
|
||||
if carrier_elements:
|
||||
print(f"\n找到承运人相关元素: {len(carrier_elements)} 个")
|
||||
for elem in carrier_elements:
|
||||
print(f" 文本: {elem.text}")
|
||||
|
||||
print("\n=== 方法2: 监听网络请求,查找数据接口 ===")
|
||||
|
||||
# 监听所有包含数据的请求
|
||||
print("开始监听网络请求...")
|
||||
page.listen.start()
|
||||
|
||||
# 滚动页面触发可能的请求
|
||||
page.scroll.down(500)
|
||||
time.sleep(3)
|
||||
page.scroll.to_bottom()
|
||||
time.sleep(5)
|
||||
|
||||
# 获取所有监听到的请求
|
||||
all_responses = page.listen.get()
|
||||
print(f"\n共监听到 {len(all_responses)} 个请求")
|
||||
|
||||
# 查找可能包含物流数据的请求
|
||||
keywords = ['track', 'logistics', 'waybill', 'express', 'delivery', '3.cn', 'jd.com', 'json', 'api']
|
||||
|
||||
for idx, resp in enumerate(all_responses):
|
||||
url = resp.url if hasattr(resp, 'url') else ''
|
||||
print(f"\n请求 {idx + 1}:")
|
||||
print(f" URL: {url[:150]}")
|
||||
|
||||
# 检查是否包含关键词
|
||||
url_lower = url.lower()
|
||||
if any(keyword in url_lower for keyword in keywords):
|
||||
print(f" ⭐ 可能相关的请求!")
|
||||
try:
|
||||
if hasattr(resp, 'response') and hasattr(resp.response, 'body'):
|
||||
body = resp.response.body
|
||||
if isinstance(body, dict):
|
||||
print(f" 响应数据 (前500字符): {str(body)[:500]}")
|
||||
# 尝试解析 JSON
|
||||
print(f" 完整的 JSON 数据:")
|
||||
print(json.dumps(body, indent=2, ensure_ascii=False)[:1000])
|
||||
elif isinstance(body, str):
|
||||
print(f" 响应数据 (前500字符): {body[:500]}")
|
||||
# 尝试解析 JSON
|
||||
try:
|
||||
json_data = json.loads(body)
|
||||
print(f" 解析后的 JSON (前1000字符):")
|
||||
print(json.dumps(json_data, indent=2, ensure_ascii=False)[:1000])
|
||||
except:
|
||||
pass
|
||||
except Exception as e:
|
||||
print(f" 解析响应时出错: {e}")
|
||||
|
||||
print("\n=== 方法3: 提取页面 HTML 中的 JSON 数据 ===")
|
||||
|
||||
# 获取页面 HTML
|
||||
html = page.html
|
||||
# 查找可能的 JSON 数据(在 script 标签中)
|
||||
json_patterns = [
|
||||
r'window\.__INITIAL_STATE__\s*=\s*({.+?});',
|
||||
r'var\s+trackData\s*=\s*({.+?});',
|
||||
r'const\s+trackingInfo\s*=\s*({.+?});',
|
||||
r'data\s*:\s*({.+?})',
|
||||
r'"waybillNo"[:\s]+"([^"]+)"',
|
||||
r'"trackingNumber"[:\s]+"([^"]+)"',
|
||||
]
|
||||
|
||||
for pattern in json_patterns:
|
||||
matches = re.findall(pattern, html, re.DOTALL)
|
||||
if matches:
|
||||
print(f"\n找到匹配模式 {pattern}:")
|
||||
for match in matches[:3]: # 只显示前3个
|
||||
print(f" 匹配: {str(match)[:200]}")
|
||||
|
||||
print("\n=== 尝试提取页面中的所有文本内容 ===")
|
||||
page_text = page.html
|
||||
# 查找运单号(通常是数字)
|
||||
waybill_pattern = r'运单号[:\s]*(\d+)'
|
||||
waybill_matches = re.findall(waybill_pattern, page_text)
|
||||
if waybill_matches:
|
||||
print(f"找到运单号: {waybill_matches}")
|
||||
|
||||
# 查找承运人
|
||||
carrier_pattern = r'国内承运人[:\s]*([^\s<]+)'
|
||||
carrier_matches = re.findall(carrier_pattern, page_text)
|
||||
if carrier_matches:
|
||||
print(f"找到承运人: {carrier_matches}")
|
||||
|
||||
# 查找电话号码
|
||||
phone_pattern = r'国内承运人电话[:\s]*(\d+)'
|
||||
phone_matches = re.findall(phone_pattern, page_text)
|
||||
if phone_matches:
|
||||
print(f"找到电话: {phone_matches}")
|
||||
|
||||
print("\n=== 等待用户查看页面 ===")
|
||||
print("页面已打开,请手动检查浏览器中的网络请求(F12 -> Network),查找包含物流数据的 API")
|
||||
print("按 Enter 键继续或等待 60 秒后自动关闭...")
|
||||
|
||||
try:
|
||||
input()
|
||||
except:
|
||||
time.sleep(60)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n用户中断脚本执行")
|
||||
except Exception as e:
|
||||
print(f"\n发生错误: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
print("\n脚本执行完成,浏览器保持打开状态用于调试")
|
||||
# 可以选择是否关闭浏览器
|
||||
# page.quit()
|
||||
|
||||
Reference in New Issue
Block a user