java读写操作S3
场景问题:根据前缀批量查询aws S3的数据文件
1.根据实现批量查询并下载数据
1.根据这是阿里云的S3,与aws的S3的java API基本类似
http://aliyun_portal_storage.oss.aliyuncs.com/oss_api/oss_javahtml/object.html#id6
可以参考aws自己的操作S3的的官方文档
https://docs.aws.amazon.com/zh_cn/s3/?id=docs_gateway
2.使用 AWS SDK for Java 列出键
(1)方法一:使用ListObjectsV2Request
方式是官方使用的方式。
使用 AWS SDK for Java 列出键
https://docs.amazonaws.cn/AmazonS3/latest/dev/ListingObjectKeysUsingJava.html
package swtx.test;
import com.amazonaws.AmazonServiceException;
import com.amazonaws.SdkClientException;
import com.amazonaws.auth.profile.ProfileCredentialsProvider;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
import com.amazonaws.services.s3.model.*;
import swtx.myutils.SerializableUtil;
import java.io.IOException;
import java.io.InputStream;
/** 根据前缀批量查询aws S3的结果!
* @author fangchangtan
* @date 2019-07-25 11:54
*/
public class ListKeys {
private static String clientRegion = "*** Client region ***";
private static String bucketName = "*** Bucket name ***";
private static String bucket_path = bucketName+"/upload/201907018";
private static AmazonS3 s3Client;
static {
s3Client = AmazonS3ClientBuilder.standard()
.withCredentials(new ProfileCredentialsProvider())
.withRegion(clientRegion)
.build();
}
/**功能:根据提供的前缀,查找所有满足条件的key,此时并没有将data值查出,只是返回满足条件的key值。
* 然后根据key值取s3中查找相应的value值;
* 其中withMaxKeys这个值对查询性能影响很大,设置较大分页查询条数,可以相应的快速结果,
* 例如S3一共161条数据,分别设置MaxKey = 1 和200,性能相差20多倍。建议设置分页。
* 这是的最新API
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
try {
System.out.println("Listing objects");
// maxKeys is set to 2 to demonstrate the use of
// ListObjectsV2Result.getNextContinuationToken()
ListObjectsV2Request req = new ListObjectsV2Request()
.withBucketName(bucketName)
.withPrefix(bucket_path)
.withDelimiter("/")
.withMaxKeys(2);
ListObjectsV2Result result;
do {
result = s3Client.listObjectsV2(req);
for (S3ObjectSummary objectSummary : result.getObjectSummaries()) {
System.out.printf(" - %s (size: %d)\n", objectSummary.getKey(), objectSummary.getSize());
//根据我的key中文件的名称,截取出具体的额文件名;例如;myBucket/upload/201907018-xxxfilename
String[] split = objectSummary.getKey().split("\\/");
//实际是201907018-xxxfilename
String fileName = split[split.length - 1];
String myBucketPath = "myBucket/upload/";
byte[] objectByteFromS3 = getObjectByteFromS3(myBucketPath, fileName);
//最终输出的是自己数组objectByteFromS3
}
// If there are more than maxKeys keys in the bucket, get a continuation token
// and list the next objects.
String token = result.getNextContinuationToken();
System.out.println("Next Continuation Token: " + token);
req.setContinuationToken(token);
} while (result.isTruncated());
} catch(AmazonServiceException e) {
// The call was transmitted successfully, but Amazon S3 couldn't process
// it, so it returned an error response.
e.printStackTrace();
} catch(SdkClientException e) {
// Amazon S3 couldn't be contacted for a response, or the client
// couldn't parse the response from Amazon S3.
e.printStackTrace();
}
}
/**
* 获取指定的s3数据
* @param bucketName
* @param key
* @return
* @throws IOException
*/
public static byte[] getObjectByteFromS3(String bucketName, String key) {
S3Object object = s3Client.getObject(new GetObjectRequest(bucketName, key));
if (object != null) {
//获取文件流
InputStream inputStream =object.getObjectContent();
byte[] bytes = SerializableUtil.convertInStream2Byte(inputStream);
return bytes;
}
return null;
}
}
实际s3的查找似乎只能使用前缀的方式首先过滤出key,拿到key之后再去S3中找出相应的value数据文件,并下载下来
(2)方法二
另外使用另外的java api也listObjectsRequest是可以的,同上面的效果,如下
/**
* 使用listObjectsRequest累出所有符合前缀prefix的key
*/
public static void listKeyS3Test3() {
ListObjectsRequest listObjectsRequest = new ListObjectsRequest()
.withBucketName(ConstantInfo.my_bucket)
.withPrefix(ConstantInfo.dirPath_prefix)
// .withMarker(ConstantInfo.obj_marker)
.withDelimiter(ConstantInfo.obj_delimiter)
.withMaxKeys(ConstantInfo.obj_max_key);
int count3 = 0;
ObjectListing objectListing = s3Client.listObjects(listObjectsRequest);
do {
for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) {
String key = objectSummary.getKey();
System.out.println("key:" +key)
count3++;
}
objectListing = s3Client.listNextBatchOfObjects(objectListing);
} while (objectListing.isTruncated());
System.out.println("count2: "+count3);
}
具体的java API还可以参考如下:
s3 amazon常用接口: http://www.voidcn.com/article/p-yyvypoad-zv.html