爬虫写到论坛网站

This commit is contained in:
Leo
2024-04-29 17:52:20 +08:00
parent 0f73cd01d0
commit 082ddc5c0d
10 changed files with 1825 additions and 6 deletions

39
pom.xml
View File

@@ -77,6 +77,25 @@
<artifactId>UserAgentUtils</artifactId> <artifactId>UserAgentUtils</artifactId>
<version>1.21</version> <version>1.21</version>
</dependency> </dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
</dependency>
<dependency>
<groupId>org.mybatis.spring.boot</groupId>
<artifactId>mybatis-spring-boot-starter</artifactId>
<version>2.2.0</version>
</dependency>
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>3.6.0</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<scope>provided</scope>
</dependency>
</dependencies> </dependencies>
<dependencyManagement> <dependencyManagement>
@@ -120,6 +139,26 @@
</execution> </execution>
</executions> </executions>
</plugin> </plugin>
<plugin>
<groupId>org.jetbrains.kotlin</groupId>
<artifactId>kotlin-maven-plugin</artifactId>
<version>1.6.10</version>
<executions>
<execution>
<id>compile</id>
<phase>process-sources</phase>
<goals>
<goal>compile</goal>
</goals>
<configuration>
<sourceDirs>
<source>src/main/java</source>
<source>target/generated-sources/annotations</source>
</sourceDirs>
</configuration>
</execution>
</executions>
</plugin>
</plugins> </plugins>
</build> </build>

View File

@@ -1,11 +1,13 @@
package cn.van333.wxsend; package cn.van333.wxsend;
import org.mybatis.spring.annotation.MapperScan;
import org.springframework.boot.SpringApplication; import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication; import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.web.servlet.config.annotation.EnableWebMvc; import org.springframework.web.servlet.config.annotation.EnableWebMvc;
@SpringBootApplication @SpringBootApplication
@EnableWebMvc @EnableWebMvc
@MapperScan("cn.van333.wxsend.business.mapper")
public class WxSendApplication { public class WxSendApplication {
public static void main(String[] args) { public static void main(String[] args) {

View File

@@ -0,0 +1,39 @@
package cn.van333.wxsend.business.model;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import javax.annotation.Resource;
import java.util.Date;
/**
* @author Leo
* @version 1.0
* @create 2024/4/29 下午3:05
* @description
*/
@Data
@AllArgsConstructor
@NoArgsConstructor
@Resource
public class FlarumDiscussion {
private Integer id;
private String title;
private int commentCount;
private int participantCount;
private int postNumberIndex;
private Date createdAt;
private Integer userId;
private Integer firstPostId;
private Date lastPostedAt;
private Integer lastPostedUserId;
private Integer lastPostId;
private Integer lastPostNumber;
private Date hiddenAt;
private Integer hiddenUserId;
private String slug;
private boolean isPrivate;
}

View File

@@ -0,0 +1,28 @@
package cn.van333.wxsend.business.model;
import lombok.Data;
import java.util.Date;
/**
* @author Leo
* @version 1.0
* @create 2024/4/29 下午3:08
* @description
*/
@Data
public class FlarumPost {
private int id;
private int discussionId;
private Integer number;
private Date createdAt;
private Integer userId;
private String type;
private String content;
private Date editedAt;
private Integer editedUserId;
private Date hiddenAt;
private Integer hiddenUserId;
private String ipAddress;
private boolean isPrivate;
}

View File

@@ -0,0 +1,21 @@
package cn.van333.wxsend.business.model;
import lombok.Data;
@Data
public class Msg {
private String id;
private String type;
private String tid;
private String hot;
private String title;
private String picname;
private String content;
private String count;
private String dizhi;
private String price;
private String addtime;
private String status;
private String hackpID;
private Object buygoods;
}

View File

@@ -0,0 +1,9 @@
package cn.van333.wxsend.business.model;
import lombok.Data;
@Data
public class respoenseModel {
private String code;
private Msg msg;
}

View File

@@ -0,0 +1,96 @@
package cn.van333.wxsend.business.service;
import cn.van333.wxsend.business.model.Msg;
import cn.van333.wxsend.business.model.respoenseModel;
import cn.van333.wxsend.util.QCUtil;
import com.alibaba.fastjson2.JSON;
import okhttp3.*;
import org.springframework.stereotype.Service;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import static java.lang.Thread.sleep;
/**
* @author Leo
* @version 1.0
* @create 2024/4/29 下午5:12
* @description
*/
@Service
public class PCService {
public static String content = "";
public void getData() throws InterruptedException {
// 6988
for (int i = 7200; i > 7000; i--) {
System.out.println("" + i + "次执行");
try {
MediaType mediaType = MediaType.parse("text/plain");
OkHttpClient client = new OkHttpClient().newBuilder().build();
RequestBody body = new MultipartBody.Builder().setType(MultipartBody.FORM).addFormDataPart("tid", String.valueOf(i)).build();
Request request = new Request.Builder().url("https://tm.wx.hackp.net/App/zm/getlist").method("POST", body).addHeader("User-Agent", "Apifox/1.0.0 ").build();
Response response = client.newCall(request).execute();
if (response.body() != null) {
String result = response.body().string();
System.out.println(result);
respoenseModel respoenseModel = JSON.parseObject(result, respoenseModel.class);
Msg msg = respoenseModel.getMsg();
if (QCUtil.isNotAnyEmpty(msg.getTitle(), msg.getContent(), msg.getDizhi())) {
//content = content + appendHtml(msg.getTitle(), msg.getContent(), msg.getDizhi(), i);
insertToDb(msg.getTitle(), msg.getContent(), msg.getDizhi());
//youshuju++;
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
sleep(100);
}
//// 有50条数据再写入文件
//if (youshuju % 100 == 0 && youshuju != 0) {
// //System.out.println(youshuju);
// j++;
// //System.out.println(content);
// String fileName = "爬取的第" + j + "页 ,最后的为" + i + "条";
// long l = System.currentTimeMillis() - start;
// System.out.println("用时:" + l + "ms," + l / 1000 + "s" + "," + l / 60000 + "min");
// createHtml(fileName, content);
// //sleep(200);
// content = "";
// youshuju = 0;
//}
}
}
public String insertToDb(String title, String content, String downloadUrl) {
content = content.replace("\\r\\n", "");
content = content.replace("<\\/span>", "");
return null;
}
public static String appendHtml(String title, String content, String downloadUrl, Integer i) {
content = content.replace("\\r\\n", "");
content = content.replace("<\\/span>", "");
content = "<span>\n" + " <h1>第" + i + " 条,标题:" + title + "</h1>\n" + "</span>" + content + "<span>\n" + " <h1>下载链接:" + downloadUrl + "</h1>\n" + "</span>";
return content;
}
public static void createHtml(String fileName, String content) {
String finallyFileName = "D:\\pacong\\xiaochengxu2\\" + fileName + ".html";
//System.out.println("finallyFileName ******** " + finallyFileName);
try (BufferedWriter writer = new BufferedWriter(new FileWriter(finallyFileName))) {
writer.write(content);
} catch (IOException e) {
e.printStackTrace();
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -8,16 +8,16 @@ spring:
profiles: profiles:
active: dev active: dev
#数据源配置 #数据源配置
# datasource: datasource:
# driver-class-name: com.mysql.cj.jdbc.Driver driver-class-name: com.mysql.cj.jdbc.Driver
# url: jdbc:mysql://43.136.29.133:33306/imaotai?characterEncoding=utf-8&useSSL=true&serverTimezone=GMT%2B8 url: jdbc:mysql://134.175.126.60:33306/flarum_tsayij?characterEncoding=utf-8&useSSL=true&serverTimezone=GMT%2B8
# username: root username: root
# password: LK.807878712 password: LK.807878712
#redis配置 #redis配置
redis: redis:
host: 134.175.126.60 host: 134.175.126.60
port: 36379 port: 36379
database: 3 database: 7
timeout: 1800000 timeout: 1800000
lettuce: lettuce:
pool: pool:

View File

@@ -0,0 +1,24 @@
package cn.van333.wxsend;
import cn.van333.wxsend.business.service.PCService;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
/**
* @author Leo
* @version 1.0
* @create 2024/4/29 下午5:06
* @description
*/
@SpringBootTest
public class Test001 {
@Autowired
PCService pcService;
@Test
public void test001() throws InterruptedException {
System.out.println("test001");
pcService.getData();
}
}