爬虫写到论坛网站
This commit is contained in:
39
pom.xml
39
pom.xml
@@ -77,6 +77,25 @@
|
||||
<artifactId>UserAgentUtils</artifactId>
|
||||
<version>1.21</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>mysql</groupId>
|
||||
<artifactId>mysql-connector-java</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mybatis.spring.boot</groupId>
|
||||
<artifactId>mybatis-spring-boot-starter</artifactId>
|
||||
<version>2.2.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.squareup.okhttp3</groupId>
|
||||
<artifactId>okhttp</artifactId>
|
||||
<version>3.6.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
<dependencyManagement>
|
||||
@@ -120,6 +139,26 @@
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.jetbrains.kotlin</groupId>
|
||||
<artifactId>kotlin-maven-plugin</artifactId>
|
||||
<version>1.6.10</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>compile</id>
|
||||
<phase>process-sources</phase>
|
||||
<goals>
|
||||
<goal>compile</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<sourceDirs>
|
||||
<source>src/main/java</source>
|
||||
<source>target/generated-sources/annotations</source>
|
||||
</sourceDirs>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
package cn.van333.wxsend;
|
||||
|
||||
import org.mybatis.spring.annotation.MapperScan;
|
||||
import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.web.servlet.config.annotation.EnableWebMvc;
|
||||
|
||||
@SpringBootApplication
|
||||
@EnableWebMvc
|
||||
@MapperScan("cn.van333.wxsend.business.mapper")
|
||||
public class WxSendApplication {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
package cn.van333.wxsend.business.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
import java.util.Date;
|
||||
|
||||
/**
|
||||
* @author Leo
|
||||
* @version 1.0
|
||||
* @create 2024/4/29 下午3:05
|
||||
* @description:
|
||||
*/
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
@Resource
|
||||
public class FlarumDiscussion {
|
||||
|
||||
private Integer id;
|
||||
private String title;
|
||||
private int commentCount;
|
||||
private int participantCount;
|
||||
private int postNumberIndex;
|
||||
private Date createdAt;
|
||||
private Integer userId;
|
||||
private Integer firstPostId;
|
||||
private Date lastPostedAt;
|
||||
private Integer lastPostedUserId;
|
||||
private Integer lastPostId;
|
||||
private Integer lastPostNumber;
|
||||
private Date hiddenAt;
|
||||
private Integer hiddenUserId;
|
||||
private String slug;
|
||||
private boolean isPrivate;
|
||||
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
package cn.van333.wxsend.business.model;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.Date;
|
||||
|
||||
/**
|
||||
* @author Leo
|
||||
* @version 1.0
|
||||
* @create 2024/4/29 下午3:08
|
||||
* @description:
|
||||
*/
|
||||
@Data
|
||||
public class FlarumPost {
|
||||
private int id;
|
||||
private int discussionId;
|
||||
private Integer number;
|
||||
private Date createdAt;
|
||||
private Integer userId;
|
||||
private String type;
|
||||
private String content;
|
||||
private Date editedAt;
|
||||
private Integer editedUserId;
|
||||
private Date hiddenAt;
|
||||
private Integer hiddenUserId;
|
||||
private String ipAddress;
|
||||
private boolean isPrivate;
|
||||
}
|
||||
21
src/main/java/cn/van333/wxsend/business/model/Msg.java
Normal file
21
src/main/java/cn/van333/wxsend/business/model/Msg.java
Normal file
@@ -0,0 +1,21 @@
|
||||
package cn.van333.wxsend.business.model;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class Msg {
|
||||
private String id;
|
||||
private String type;
|
||||
private String tid;
|
||||
private String hot;
|
||||
private String title;
|
||||
private String picname;
|
||||
private String content;
|
||||
private String count;
|
||||
private String dizhi;
|
||||
private String price;
|
||||
private String addtime;
|
||||
private String status;
|
||||
private String hackpID;
|
||||
private Object buygoods;
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
package cn.van333.wxsend.business.model;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class respoenseModel {
|
||||
private String code;
|
||||
private Msg msg;
|
||||
}
|
||||
@@ -0,0 +1,96 @@
|
||||
package cn.van333.wxsend.business.service;
|
||||
|
||||
import cn.van333.wxsend.business.model.Msg;
|
||||
import cn.van333.wxsend.business.model.respoenseModel;
|
||||
import cn.van333.wxsend.util.QCUtil;
|
||||
import com.alibaba.fastjson2.JSON;
|
||||
import okhttp3.*;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
|
||||
import static java.lang.Thread.sleep;
|
||||
|
||||
/**
|
||||
* @author Leo
|
||||
* @version 1.0
|
||||
* @create 2024/4/29 下午5:12
|
||||
* @description:
|
||||
*/
|
||||
@Service
|
||||
public class PCService {
|
||||
public static String content = "";
|
||||
|
||||
public void getData() throws InterruptedException {
|
||||
|
||||
// 6988
|
||||
for (int i = 7200; i > 7000; i--) {
|
||||
System.out.println("第 " + i + "次执行");
|
||||
try {
|
||||
MediaType mediaType = MediaType.parse("text/plain");
|
||||
OkHttpClient client = new OkHttpClient().newBuilder().build();
|
||||
RequestBody body = new MultipartBody.Builder().setType(MultipartBody.FORM).addFormDataPart("tid", String.valueOf(i)).build();
|
||||
Request request = new Request.Builder().url("https://tm.wx.hackp.net/App/zm/getlist").method("POST", body).addHeader("User-Agent", "Apifox/1.0.0 ").build();
|
||||
Response response = client.newCall(request).execute();
|
||||
if (response.body() != null) {
|
||||
String result = response.body().string();
|
||||
System.out.println(result);
|
||||
respoenseModel respoenseModel = JSON.parseObject(result, respoenseModel.class);
|
||||
Msg msg = respoenseModel.getMsg();
|
||||
if (QCUtil.isNotAnyEmpty(msg.getTitle(), msg.getContent(), msg.getDizhi())) {
|
||||
//content = content + appendHtml(msg.getTitle(), msg.getContent(), msg.getDizhi(), i);
|
||||
insertToDb(msg.getTitle(), msg.getContent(), msg.getDizhi());
|
||||
//youshuju++;
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
sleep(100);
|
||||
}
|
||||
|
||||
//// 有50条数据再写入文件
|
||||
//if (youshuju % 100 == 0 && youshuju != 0) {
|
||||
// //System.out.println(youshuju);
|
||||
// j++;
|
||||
// //System.out.println(content);
|
||||
// String fileName = "爬取的第" + j + "页 ,最后的为" + i + "条";
|
||||
// long l = System.currentTimeMillis() - start;
|
||||
// System.out.println("用时:" + l + "ms," + l / 1000 + "s" + "," + l / 60000 + "min");
|
||||
// createHtml(fileName, content);
|
||||
// //sleep(200);
|
||||
// content = "";
|
||||
// youshuju = 0;
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
public String insertToDb(String title, String content, String downloadUrl) {
|
||||
content = content.replace("\\r\\n", "");
|
||||
content = content.replace("<\\/span>", "");
|
||||
|
||||
return null;
|
||||
|
||||
|
||||
}
|
||||
|
||||
public static String appendHtml(String title, String content, String downloadUrl, Integer i) {
|
||||
content = content.replace("\\r\\n", "");
|
||||
content = content.replace("<\\/span>", "");
|
||||
content = "<span>\n" + " <h1>第" + i + " 条,标题:" + title + "</h1>\n" + "</span>" + content + "<span>\n" + " <h1>下载链接:" + downloadUrl + "</h1>\n" + "</span>";
|
||||
return content;
|
||||
}
|
||||
|
||||
public static void createHtml(String fileName, String content) {
|
||||
String finallyFileName = "D:\\pacong\\xiaochengxu2\\" + fileName + ".html";
|
||||
//System.out.println("finallyFileName ******** " + finallyFileName);
|
||||
try (BufferedWriter writer = new BufferedWriter(new FileWriter(finallyFileName))) {
|
||||
writer.write(content);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
1561
src/main/java/cn/van333/wxsend/util/QCUtil.java
Normal file
1561
src/main/java/cn/van333/wxsend/util/QCUtil.java
Normal file
File diff suppressed because it is too large
Load Diff
@@ -8,16 +8,16 @@ spring:
|
||||
profiles:
|
||||
active: dev
|
||||
#数据源配置
|
||||
# datasource:
|
||||
# driver-class-name: com.mysql.cj.jdbc.Driver
|
||||
# url: jdbc:mysql://43.136.29.133:33306/imaotai?characterEncoding=utf-8&useSSL=true&serverTimezone=GMT%2B8
|
||||
# username: root
|
||||
# password: LK.807878712
|
||||
datasource:
|
||||
driver-class-name: com.mysql.cj.jdbc.Driver
|
||||
url: jdbc:mysql://134.175.126.60:33306/flarum_tsayij?characterEncoding=utf-8&useSSL=true&serverTimezone=GMT%2B8
|
||||
username: root
|
||||
password: LK.807878712
|
||||
#redis配置
|
||||
redis:
|
||||
host: 134.175.126.60
|
||||
port: 36379
|
||||
database: 3
|
||||
database: 7
|
||||
timeout: 1800000
|
||||
lettuce:
|
||||
pool:
|
||||
|
||||
24
src/test/java/cn/van333/wxsend/Test001.java
Normal file
24
src/test/java/cn/van333/wxsend/Test001.java
Normal file
@@ -0,0 +1,24 @@
|
||||
package cn.van333.wxsend;
|
||||
|
||||
import cn.van333.wxsend.business.service.PCService;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
|
||||
/**
|
||||
* @author Leo
|
||||
* @version 1.0
|
||||
* @create 2024/4/29 下午5:06
|
||||
* @description:
|
||||
*/
|
||||
@SpringBootTest
|
||||
public class Test001 {
|
||||
|
||||
@Autowired
|
||||
PCService pcService;
|
||||
@Test
|
||||
public void test001() throws InterruptedException {
|
||||
System.out.println("test001");
|
||||
pcService.getData();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user