使用OBSFilesystem多线程删除目录
【摘要】 1. 多线程删除示例代码package org.example;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import jav...
1. 多线程删除示例代码
package org.example;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
public class MultiThreadDelete {
public static void main(String[] args) throws IOException, ExecutionException, InterruptedException {
String ak = args[0];
String sk = args[1];
String endpoint = args[2];
String p = args[3];
int threadNum = Integer.parseInt(args[4]);
// get filesystem
Configuration conf = new Configuration();
conf.set("fs.obs.impl", "org.apache.hadoop.fs.obs.OBSFileSystem");
conf.set("fs.obs.access.key", ak);
conf.set("fs.obs.secret.key", sk);
conf.set("fs.obs.endpoint", endpoint);
conf.setBoolean("fs.obs.trash.enable", false);
Path path = new Path(p);
FileSystem fs = path.getFileSystem(conf);
FileStatus[] fileStatuses = fs.listStatus(path);
// get thread pool
ThreadPoolExecutor deleteThreadPool = new ThreadPoolExecutor(threadNum, threadNum,
1000, TimeUnit.SECONDS, new LinkedBlockingQueue<>(10000));
// 并发删除
List<Future<?>> futures = new ArrayList<>();
for (FileStatus fileStatus : fileStatuses) {
Path deletePath = fileStatus.getPath();
futures.add(deleteThreadPool.submit(() -> {
try {
if (!fs.delete(deletePath, true)) {
System.out.printf("delete [%s] failed!\n", deletePath);
}
} catch (IOException e) {
System.out.printf("delete [%s] failed! cause by %s\n", deletePath, e);
}
}));
}
for (Future<?> f : futures) {
f.get();
}
deleteThreadPool.shutdown();
if (!fs.delete(path, true)) {
System.out.printf("delete base [%s] failed!\n", path);
}
}
}
2. POM依赖
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>obs-test</artifactId>
<groupId>org.example</groupId>
<version>1.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>multithread-delete</artifactId>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>com.huaweicloud</groupId>
<artifactId>hadoop-huaweicloud</artifactId>
<version>2.8.3-hw-46</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.8.3</version>
</dependency>
</dependencies>
</project>
3. 操作步骤
- 将以上示例打包为jar包
- 使用java命令运行:java -cp XXX.jar:hadoop-common-XXX.jar:hadoop-huaweicloud-XXX.jar org.example.MultiThreadDelete <ak> <sk> <endpoint> <delete-path: example obs://xxx/xxx> <threadNum>
注意:
- classpath需要有:1). 第一步编译出的jar包,2). hadoop-common jar包,3). obsa jar包。
- delete-path为需要删除的目录(该目录也会被删除)
- threadNum为启动${threadNum}个线程并发的删除${delete-path}下的子目录。
【版权声明】本文为华为云社区用户原创内容,转载时必须标注文章的来源(华为云社区)、文章链接、文章作者等基本信息, 否则作者和本社区有权追究责任。如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱:
cloudbbs@huaweicloud.com
- 点赞
- 收藏
- 关注作者
评论(0)