使用OBSFilesystem多线程删除目录

举报
南瓜星人 发表于 2022/03/25 15:02:56 2022/03/25
【摘要】 1. 多线程删除示例代码package org.example;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import jav...

1. 多线程删除示例代码

package org.example;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;

public class MultiThreadDelete {

    public static void main(String[] args) throws IOException, ExecutionException, InterruptedException {
        String ak = args[0];
        String sk = args[1];
        String endpoint = args[2];
        String p = args[3];
        int threadNum = Integer.parseInt(args[4]);

        // get filesystem
        Configuration conf = new Configuration();
        conf.set("fs.obs.impl", "org.apache.hadoop.fs.obs.OBSFileSystem");
        conf.set("fs.obs.access.key", ak);
        conf.set("fs.obs.secret.key", sk);
        conf.set("fs.obs.endpoint", endpoint);
        conf.setBoolean("fs.obs.trash.enable", false);

        Path path = new Path(p);
        FileSystem fs = path.getFileSystem(conf);

        FileStatus[] fileStatuses = fs.listStatus(path);

        // get thread pool
        ThreadPoolExecutor deleteThreadPool = new ThreadPoolExecutor(threadNum, threadNum,
            1000, TimeUnit.SECONDS, new LinkedBlockingQueue<>(10000));

        // 并发删除
        List<Future<?>> futures = new ArrayList<>();
        for (FileStatus fileStatus : fileStatuses) {
            Path deletePath = fileStatus.getPath();

            futures.add(deleteThreadPool.submit(() -> {
                try {
                    if (!fs.delete(deletePath, true)) {
                        System.out.printf("delete [%s] failed!\n", deletePath);
                    }
                } catch (IOException e) {
                    System.out.printf("delete [%s] failed! cause by %s\n", deletePath, e);
                }
            }));
        }

        for (Future<?> f : futures) {
            f.get();
        }

        deleteThreadPool.shutdown();

        if (!fs.delete(path, true)) {
            System.out.printf("delete base [%s] failed!\n", path);
        }
    }
}

2. POM依赖

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <parent>
        <artifactId>obs-test</artifactId>
        <groupId>org.example</groupId>
        <version>1.0-SNAPSHOT</version>
    </parent>
    <modelVersion>4.0.0</modelVersion>

    <artifactId>multithread-delete</artifactId>

    <properties>
        <maven.compiler.source>8</maven.compiler.source>
        <maven.compiler.target>8</maven.compiler.target>
    </properties>

    <dependencies>
        <dependency>
            <groupId>com.huaweicloud</groupId>
            <artifactId>hadoop-huaweicloud</artifactId>
            <version>2.8.3-hw-46</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.8.3</version>
        </dependency>
    </dependencies>

</project>

3. 操作步骤

  1. 将以上示例打包为jar包
  2. 使用java命令运行:java -cp XXX.jar:hadoop-common-XXX.jar:hadoop-huaweicloud-XXX.jar org.example.MultiThreadDelete <ak> <sk> <endpoint> <delete-path: example obs://xxx/xxx> <threadNum>

注意:

  1. classpath需要有:1). 第一步编译出的jar包,2). hadoop-common jar包,3). obsa jar包。
  2. delete-path为需要删除的目录(该目录也会被删除
  3. threadNum为启动${threadNum}个线程并发的删除${delete-path}下的子目录。
【版权声明】本文为华为云社区用户原创内容,转载时必须标注文章的来源(华为云社区)、文章链接、文章作者等基本信息, 否则作者和本社区有权追究责任。如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱: cloudbbs@huaweicloud.com
  • 点赞
  • 收藏
  • 关注作者

评论(0

0/1000
抱歉,系统识别当前为高风险访问,暂不支持该操作

全部回复

上滑加载中

设置昵称

在此一键设置昵称,即可参与社区互动!

*长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。

*长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。