爬虫写到论坛网站
This commit is contained in:
39
pom.xml
39
pom.xml
@@ -77,6 +77,25 @@
|
|||||||
<artifactId>UserAgentUtils</artifactId>
|
<artifactId>UserAgentUtils</artifactId>
|
||||||
<version>1.21</version>
|
<version>1.21</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>mysql</groupId>
|
||||||
|
<artifactId>mysql-connector-java</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.mybatis.spring.boot</groupId>
|
||||||
|
<artifactId>mybatis-spring-boot-starter</artifactId>
|
||||||
|
<version>2.2.0</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.squareup.okhttp3</groupId>
|
||||||
|
<artifactId>okhttp</artifactId>
|
||||||
|
<version>3.6.0</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.projectlombok</groupId>
|
||||||
|
<artifactId>lombok</artifactId>
|
||||||
|
<scope>provided</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
<dependencyManagement>
|
<dependencyManagement>
|
||||||
@@ -120,6 +139,26 @@
|
|||||||
</execution>
|
</execution>
|
||||||
</executions>
|
</executions>
|
||||||
</plugin>
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.jetbrains.kotlin</groupId>
|
||||||
|
<artifactId>kotlin-maven-plugin</artifactId>
|
||||||
|
<version>1.6.10</version>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>compile</id>
|
||||||
|
<phase>process-sources</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>compile</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<sourceDirs>
|
||||||
|
<source>src/main/java</source>
|
||||||
|
<source>target/generated-sources/annotations</source>
|
||||||
|
</sourceDirs>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
</plugins>
|
</plugins>
|
||||||
</build>
|
</build>
|
||||||
|
|
||||||
|
|||||||
@@ -1,11 +1,13 @@
|
|||||||
package cn.van333.wxsend;
|
package cn.van333.wxsend;
|
||||||
|
|
||||||
|
import org.mybatis.spring.annotation.MapperScan;
|
||||||
import org.springframework.boot.SpringApplication;
|
import org.springframework.boot.SpringApplication;
|
||||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||||
import org.springframework.web.servlet.config.annotation.EnableWebMvc;
|
import org.springframework.web.servlet.config.annotation.EnableWebMvc;
|
||||||
|
|
||||||
@SpringBootApplication
|
@SpringBootApplication
|
||||||
@EnableWebMvc
|
@EnableWebMvc
|
||||||
|
@MapperScan("cn.van333.wxsend.business.mapper")
|
||||||
public class WxSendApplication {
|
public class WxSendApplication {
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
|
|||||||
@@ -0,0 +1,39 @@
|
|||||||
|
package cn.van333.wxsend.business.model;
|
||||||
|
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
|
||||||
|
import javax.annotation.Resource;
|
||||||
|
import java.util.Date;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Leo
|
||||||
|
* @version 1.0
|
||||||
|
* @create 2024/4/29 下午3:05
|
||||||
|
* @description:
|
||||||
|
*/
|
||||||
|
@Data
|
||||||
|
@AllArgsConstructor
|
||||||
|
@NoArgsConstructor
|
||||||
|
@Resource
|
||||||
|
public class FlarumDiscussion {
|
||||||
|
|
||||||
|
private Integer id;
|
||||||
|
private String title;
|
||||||
|
private int commentCount;
|
||||||
|
private int participantCount;
|
||||||
|
private int postNumberIndex;
|
||||||
|
private Date createdAt;
|
||||||
|
private Integer userId;
|
||||||
|
private Integer firstPostId;
|
||||||
|
private Date lastPostedAt;
|
||||||
|
private Integer lastPostedUserId;
|
||||||
|
private Integer lastPostId;
|
||||||
|
private Integer lastPostNumber;
|
||||||
|
private Date hiddenAt;
|
||||||
|
private Integer hiddenUserId;
|
||||||
|
private String slug;
|
||||||
|
private boolean isPrivate;
|
||||||
|
|
||||||
|
}
|
||||||
@@ -0,0 +1,28 @@
|
|||||||
|
package cn.van333.wxsend.business.model;
|
||||||
|
|
||||||
|
import lombok.Data;
|
||||||
|
|
||||||
|
import java.util.Date;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Leo
|
||||||
|
* @version 1.0
|
||||||
|
* @create 2024/4/29 下午3:08
|
||||||
|
* @description:
|
||||||
|
*/
|
||||||
|
@Data
|
||||||
|
public class FlarumPost {
|
||||||
|
private int id;
|
||||||
|
private int discussionId;
|
||||||
|
private Integer number;
|
||||||
|
private Date createdAt;
|
||||||
|
private Integer userId;
|
||||||
|
private String type;
|
||||||
|
private String content;
|
||||||
|
private Date editedAt;
|
||||||
|
private Integer editedUserId;
|
||||||
|
private Date hiddenAt;
|
||||||
|
private Integer hiddenUserId;
|
||||||
|
private String ipAddress;
|
||||||
|
private boolean isPrivate;
|
||||||
|
}
|
||||||
21
src/main/java/cn/van333/wxsend/business/model/Msg.java
Normal file
21
src/main/java/cn/van333/wxsend/business/model/Msg.java
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
package cn.van333.wxsend.business.model;
|
||||||
|
|
||||||
|
import lombok.Data;
|
||||||
|
|
||||||
|
@Data
|
||||||
|
public class Msg {
|
||||||
|
private String id;
|
||||||
|
private String type;
|
||||||
|
private String tid;
|
||||||
|
private String hot;
|
||||||
|
private String title;
|
||||||
|
private String picname;
|
||||||
|
private String content;
|
||||||
|
private String count;
|
||||||
|
private String dizhi;
|
||||||
|
private String price;
|
||||||
|
private String addtime;
|
||||||
|
private String status;
|
||||||
|
private String hackpID;
|
||||||
|
private Object buygoods;
|
||||||
|
}
|
||||||
@@ -0,0 +1,9 @@
|
|||||||
|
package cn.van333.wxsend.business.model;
|
||||||
|
|
||||||
|
import lombok.Data;
|
||||||
|
|
||||||
|
@Data
|
||||||
|
public class respoenseModel {
|
||||||
|
private String code;
|
||||||
|
private Msg msg;
|
||||||
|
}
|
||||||
@@ -0,0 +1,96 @@
|
|||||||
|
package cn.van333.wxsend.business.service;
|
||||||
|
|
||||||
|
import cn.van333.wxsend.business.model.Msg;
|
||||||
|
import cn.van333.wxsend.business.model.respoenseModel;
|
||||||
|
import cn.van333.wxsend.util.QCUtil;
|
||||||
|
import com.alibaba.fastjson2.JSON;
|
||||||
|
import okhttp3.*;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import java.io.BufferedWriter;
|
||||||
|
import java.io.FileWriter;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import static java.lang.Thread.sleep;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Leo
|
||||||
|
* @version 1.0
|
||||||
|
* @create 2024/4/29 下午5:12
|
||||||
|
* @description:
|
||||||
|
*/
|
||||||
|
@Service
|
||||||
|
public class PCService {
|
||||||
|
public static String content = "";
|
||||||
|
|
||||||
|
public void getData() throws InterruptedException {
|
||||||
|
|
||||||
|
// 6988
|
||||||
|
for (int i = 7200; i > 7000; i--) {
|
||||||
|
System.out.println("第 " + i + "次执行");
|
||||||
|
try {
|
||||||
|
MediaType mediaType = MediaType.parse("text/plain");
|
||||||
|
OkHttpClient client = new OkHttpClient().newBuilder().build();
|
||||||
|
RequestBody body = new MultipartBody.Builder().setType(MultipartBody.FORM).addFormDataPart("tid", String.valueOf(i)).build();
|
||||||
|
Request request = new Request.Builder().url("https://tm.wx.hackp.net/App/zm/getlist").method("POST", body).addHeader("User-Agent", "Apifox/1.0.0 ").build();
|
||||||
|
Response response = client.newCall(request).execute();
|
||||||
|
if (response.body() != null) {
|
||||||
|
String result = response.body().string();
|
||||||
|
System.out.println(result);
|
||||||
|
respoenseModel respoenseModel = JSON.parseObject(result, respoenseModel.class);
|
||||||
|
Msg msg = respoenseModel.getMsg();
|
||||||
|
if (QCUtil.isNotAnyEmpty(msg.getTitle(), msg.getContent(), msg.getDizhi())) {
|
||||||
|
//content = content + appendHtml(msg.getTitle(), msg.getContent(), msg.getDizhi(), i);
|
||||||
|
insertToDb(msg.getTitle(), msg.getContent(), msg.getDizhi());
|
||||||
|
//youshuju++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
} finally {
|
||||||
|
sleep(100);
|
||||||
|
}
|
||||||
|
|
||||||
|
//// 有50条数据再写入文件
|
||||||
|
//if (youshuju % 100 == 0 && youshuju != 0) {
|
||||||
|
// //System.out.println(youshuju);
|
||||||
|
// j++;
|
||||||
|
// //System.out.println(content);
|
||||||
|
// String fileName = "爬取的第" + j + "页 ,最后的为" + i + "条";
|
||||||
|
// long l = System.currentTimeMillis() - start;
|
||||||
|
// System.out.println("用时:" + l + "ms," + l / 1000 + "s" + "," + l / 60000 + "min");
|
||||||
|
// createHtml(fileName, content);
|
||||||
|
// //sleep(200);
|
||||||
|
// content = "";
|
||||||
|
// youshuju = 0;
|
||||||
|
//}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String insertToDb(String title, String content, String downloadUrl) {
|
||||||
|
content = content.replace("\\r\\n", "");
|
||||||
|
content = content.replace("<\\/span>", "");
|
||||||
|
|
||||||
|
return null;
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String appendHtml(String title, String content, String downloadUrl, Integer i) {
|
||||||
|
content = content.replace("\\r\\n", "");
|
||||||
|
content = content.replace("<\\/span>", "");
|
||||||
|
content = "<span>\n" + " <h1>第" + i + " 条,标题:" + title + "</h1>\n" + "</span>" + content + "<span>\n" + " <h1>下载链接:" + downloadUrl + "</h1>\n" + "</span>";
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void createHtml(String fileName, String content) {
|
||||||
|
String finallyFileName = "D:\\pacong\\xiaochengxu2\\" + fileName + ".html";
|
||||||
|
//System.out.println("finallyFileName ******** " + finallyFileName);
|
||||||
|
try (BufferedWriter writer = new BufferedWriter(new FileWriter(finallyFileName))) {
|
||||||
|
writer.write(content);
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
1561
src/main/java/cn/van333/wxsend/util/QCUtil.java
Normal file
1561
src/main/java/cn/van333/wxsend/util/QCUtil.java
Normal file
File diff suppressed because it is too large
Load Diff
@@ -8,16 +8,16 @@ spring:
|
|||||||
profiles:
|
profiles:
|
||||||
active: dev
|
active: dev
|
||||||
#数据源配置
|
#数据源配置
|
||||||
# datasource:
|
datasource:
|
||||||
# driver-class-name: com.mysql.cj.jdbc.Driver
|
driver-class-name: com.mysql.cj.jdbc.Driver
|
||||||
# url: jdbc:mysql://43.136.29.133:33306/imaotai?characterEncoding=utf-8&useSSL=true&serverTimezone=GMT%2B8
|
url: jdbc:mysql://134.175.126.60:33306/flarum_tsayij?characterEncoding=utf-8&useSSL=true&serverTimezone=GMT%2B8
|
||||||
# username: root
|
username: root
|
||||||
# password: LK.807878712
|
password: LK.807878712
|
||||||
#redis配置
|
#redis配置
|
||||||
redis:
|
redis:
|
||||||
host: 134.175.126.60
|
host: 134.175.126.60
|
||||||
port: 36379
|
port: 36379
|
||||||
database: 3
|
database: 7
|
||||||
timeout: 1800000
|
timeout: 1800000
|
||||||
lettuce:
|
lettuce:
|
||||||
pool:
|
pool:
|
||||||
|
|||||||
24
src/test/java/cn/van333/wxsend/Test001.java
Normal file
24
src/test/java/cn/van333/wxsend/Test001.java
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
package cn.van333.wxsend;
|
||||||
|
|
||||||
|
import cn.van333.wxsend.business.service.PCService;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
|
import org.springframework.boot.test.context.SpringBootTest;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Leo
|
||||||
|
* @version 1.0
|
||||||
|
* @create 2024/4/29 下午5:06
|
||||||
|
* @description:
|
||||||
|
*/
|
||||||
|
@SpringBootTest
|
||||||
|
public class Test001 {
|
||||||
|
|
||||||
|
@Autowired
|
||||||
|
PCService pcService;
|
||||||
|
@Test
|
||||||
|
public void test001() throws InterruptedException {
|
||||||
|
System.out.println("test001");
|
||||||
|
pcService.getData();
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user