【愚公系列】2022年01月 MinIO文件存储服务器-对象操作(Python版)
【摘要】 什么是对象?对象是MinIO存储数据的基本单元,也被称为MinIO的文件。对象由元信息(Object Meta)、用户数据(Data)和文件名(Key)组成。对象由存储空间内部唯一的Key来标识。对象元信息是一组键值对,表示了对象的一些属性,例如最后修改时间、大小等信息,同时您也可以在元信息中存储一些自定义的信息。对象存储在存储空间(Bucket)中的。 一、对象操作 1.对象数据的获取 ...
什么是对象?
对象是MinIO存储数据的基本单元,也被称为MinIO的文件。对象由元信息(Object Meta)、用户数据(Data)和文件名(Key)组成。对象由存储空间内部唯一的Key来标识。对象元信息是一组键值对,表示了对象的一些属性,例如最后修改时间、大小等信息,同时您也可以在元信息中存储一些自定义的信息。
对象存储在存储空间(Bucket)中的。
一、对象操作
1.对象数据的获取
1.1 从对象的偏移量到长度获取数据
# 获取对象的数据。
try:
response = client.get_object("my-bucket", "my-object")
# 从响应中读取数据。
finally:
response.close()
response.release_conn()
# 获取版本ID为的对象的数据。
try:
response = client.get_object(
"my-bucket", "my-object",
version_id="dfbd25b3-abec-4184-a4e8-5a35a5c1174d",
)
# 从响应中读取数据。
finally:
response.close()
response.release_conn()
# 从偏移量和长度中获取对象的数据。
try:
response = client.get_object(
"my-bucket", "my-object", offset=512, length=1024,
)
# 从响应中读取数据。
finally:
response.close()
response.release_conn()
# 获取SSE-C加密对象的数据。
try:
response = client.get_object(
"my-bucket", "my-object",
ssec=SseCustomerKey(b"32byteslongsecretkeymustprovided"),
)
# 从响应中读取数据。
finally:
response.close()
response.release_conn()
1.2 通过 SQL 表达式选择对象的内容
with client.select_object_content(
"my-bucket",
"my-object.csv",
SelectRequest(
"select * from S3Object",
CSVInputSerialization(),
CSVOutputSerialization(),
request_progress=True,
),
) as result:
for data in result.stream():
print(data.decode())
print(result.stats())
1.3 获取对象的对象信息和元数据
# 获取对象信息
result = client.stat_object("my-bucket", "my-object")
print(
"last-modified: {0}, size: {1}".format(
result.last_modified, result.size,
),
)
# 获取version-ID的对象信息
result = client.stat_object(
"my-bucket", "my-object",
version_id="dfbd25b3-abec-4184-a4e8-5a35a5c1174d",
)
print(
"last-modified: {0}, size: {1}".format(
result.last_modified, result.size,
),
)
# 获取SSE-C加密的对象信息
result = client.stat_object(
"my-bucket", "my-object",
ssec=SseCustomerKey(b"32byteslongsecretkeymustprovided"),
)
print(
"last-modified: {0}, size: {1}".format(
result.last_modified, result.size,
),
)
2.对象创建
2.1 将对象的数据下载到文件中
# 下载对象的数据
client.fget_object("my-bucket", "my-object", "my-filename")
# 下载版本ID为的对象的数据
client.fget_object(
"my-bucket", "my-object", "my-filename",
version_id="dfbd25b3-abec-4184-a4e8-5a35a5c1174d",
)
# 下载SSE-C加密对象的数据
client.fget_object(
"my-bucket", "my-object", "my-filename",
ssec=SseCustomerKey(b"32byteslongsecretkeymustprovided"),
)
2.2 复制对象数据
from datetime import datetime, timezone
from minio.commonconfig import REPLACE, CopySource
# 将一个对象从一个桶复制到另一个桶
result = client.copy_object(
"my-bucket",
"my-object",
CopySource("my-sourcebucket", "my-sourceobject"),
)
print(result.object_name, result.version_id)
# 复制有条件的对象
result = client.copy_object(
"my-bucket",
"my-object",
CopySource(
"my-sourcebucket",
"my-sourceobject",
modified_since=datetime(2014, 4, 1, tzinfo=timezone.utc),
),
)
print(result.object_name, result.version_id)
# 用替换元数据从bucket复制对象
metadata = {"test_meta_key": "test_meta_value"}
result = client.copy_object(
"my-bucket",
"my-object",
CopySource("my-sourcebucket", "my-sourceobject"),
metadata=metadata,
metadata_directive=REPLACE,
)
print(result.object_name, result.version_id)
2.3 副本组合创建对象
from minio.commonconfig import ComposeSource
from minio.sse import SseS3
sources = [
ComposeSource("my-job-bucket", "my-object-part-one"),
ComposeSource("my-job-bucket", "my-object-part-two"),
ComposeSource("my-job-bucket", "my-object-part-three"),
]
#通过组合源对象创建我的bucket/my object
#名单
result = client.compose_object("my-bucket", "my-object", sources)
print(result.object_name, result.version_id)
#通过组合使用用户元数据创建my bucket/my object
#源对象列表。
result = client.compose_object(
"my-bucket",
"my-object",
sources,
metadata={"test_meta_key": "test_meta_value"},
)
print(result.object_name, result.version_id)
#使用用户元数据和
#通过组合源对象列表进行服务器端加密。
client.compose_object("my-bucket", "my-object", sources, sse=SseS3())
print(result.object_name, result.version_id)
2.4 本地数据流上传到对象
# 上传数据
result = client.put_object(
"my-bucket", "my-object", io.BytesIO(b"hello"), 5,
)
print(
"created {0} object; etag: {1}, version-id: {2}".format(
result.object_name, result.etag, result.version_id,
),
)
# 上传大小未知的数据
data = urlopen(
"https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.4.81.tar.xz",
)
result = client.put_object(
"my-bucket", "my-object", data, length=-1, part_size=10*1024*1024,
)
print(
"created {0} object; etag: {1}, version-id: {2}".format(
result.object_name, result.etag, result.version_id,
),
)
# 上传内容类型为application/csv的数据
result = client.put_object(
"my-bucket", "my-object", io.BytesIO(b"hello"), 5,
content_type="application/csv",
)
print(
"created {0} object; etag: {1}, version-id: {2}".format(
result.object_name, result.etag, result.version_id,
),
)
# 上传带有元数据的数据
result = client.put_object(
"my-bucket", "my-object", io.BytesIO(b"hello"), 5,
metadata={"My-Project": "one"},
)
print(
"created {0} object; etag: {1}, version-id: {2}".format(
result.object_name, result.etag, result.version_id,
),
)
# 使用服务器端加密的客户密钥类型上载数据
result = client.put_object(
"my-bucket", "my-object", io.BytesIO(b"hello"), 5,
sse=SseCustomerKey(b"32byteslongsecretkeymustprovided"),
)
print(
"created {0} object; etag: {1}, version-id: {2}".format(
result.object_name, result.etag, result.version_id,
),
)
# 使用KMS类型的服务器端加密上载数据。
result = client.put_object(
"my-bucket", "my-object", io.BytesIO(b"hello"), 5,
sse=SseKMS("KMS-KEY-ID", {"Key1": "Value1", "Key2": "Value2"}),
)
print(
"created {0} object; etag: {1}, version-id: {2}".format(
result.object_name, result.etag, result.version_id,
),
)
# 使用S3类型的服务器端加密上传数据。
result = client.put_object(
"my-bucket", "my-object", io.BytesIO(b"hello"), 5,
sse=SseS3(),
)
print(
"created {0} object; etag: {1}, version-id: {2}".format(
result.object_name, result.etag, result.version_id,
),
)
# 上传带有标签、保留和法律封存的数据。
date = datetime.utcnow().replace(
hour=0, minute=0, second=0, microsecond=0,
) + timedelta(days=30)
tags = Tags(for_object=True)
tags["User"] = "jsmith"
result = client.put_object(
"my-bucket", "my-object", io.BytesIO(b"hello"), 5,
tags=tags,
retention=Retention(GOVERNANCE, date),
legal_hold=True,
)
print(
"created {0} object; etag: {1}, version-id: {2}".format(
result.object_name, result.etag, result.version_id,
),
)
# 用进度条上传数据。
result = client.put_object(
"my-bucket", "my-object", io.BytesIO(b"hello"), 5,
progress=Progress(),
)
print(
"created {0} object; etag: {1}, version-id: {2}".format(
result.object_name, result.etag, result.version_id,
),
)
2.5 将文件中的数据上传到存储桶中的对象
# 上传数据
result = client.fput_object(
"my-bucket", "my-object", "my-filename",
)
print(
"created {0} object; etag: {1}, version-id: {2}".format(
result.object_name, result.etag, result.version_id,
),
)
# 上传内容类型为application/csv的数据
result = client.fput_object(
"my-bucket", "my-object", "my-filename",
content_type="application/csv",
)
print(
"created {0} object; etag: {1}, version-id: {2}".format(
result.object_name, result.etag, result.version_id,
),
)
# 上传带有元数据的数据
result = client.fput_object(
"my-bucket", "my-object", "my-filename",
metadata={"My-Project": "one"},
)
print(
"created {0} object; etag: {1}, version-id: {2}".format(
result.object_name, result.etag, result.version_id,
),
)
# 使用服务器端加密的客户密钥类型上载数据
result = client.fput_object(
"my-bucket", "my-object", "my-filename",
sse=SseCustomerKey(b"32byteslongsecretkeymustprovided"),
)
print(
"created {0} object; etag: {1}, version-id: {2}".format(
result.object_name, result.etag, result.version_id,
),
)
# 使用KMS类型的服务器端加密上载数据。
result = client.fput_object(
"my-bucket", "my-object", "my-filename",
sse=SseKMS("KMS-KEY-ID", {"Key1": "Value1", "Key2": "Value2"}),
)
print(
"created {0} object; etag: {1}, version-id: {2}".format(
result.object_name, result.etag, result.version_id,
),
)
# 使用S3类型的服务器端加密上传数据。
result = client.fput_object(
"my-bucket", "my-object", "my-filename",
sse=SseS3(),
)
print(
"created {0} object; etag: {1}, version-id: {2}".format(
result.object_name, result.etag, result.version_id,
),
)
# 上传带有标签、保留和法律封存的数据。
date = datetime.utcnow().replace(
hour=0, minute=0, second=0, microsecond=0,
) + timedelta(days=30)
tags = Tags(for_object=True)
tags["User"] = "jsmith"
result = client.fput_object(
"my-bucket", "my-object", "my-filename",
tags=tags,
retention=Retention(GOVERNANCE, date),
legal_hold=True,
)
print(
"created {0} object; etag: {1}, version-id: {2}".format(
result.object_name, result.etag, result.version_id,
),
)
# 用进度条上传数据
result = client.fput_object(
"my-bucket", "my-object", "my-filename",
progress=Progress(),
)
print(
"created {0} object; etag: {1}, version-id: {2}".format(
result.object_name, result.etag, result.version_id,
),
)
3.对象删除
3.1 移除一个对象
# 移除对象
client.remove_object("my-bucket", "my-object")
# 删除对象的版本
client.remove_object(
"my-bucket", "my-object",
version_id="dfbd25b3-abec-4184-a4e8-5a35a5c1174d",
)
3.2 移除多个对象
# 删除对象列表
errors = client.remove_objects(
"my-bucket",
[
DeleteObject("my-object1"),
DeleteObject("my-object2"),
DeleteObject("my-object3", "13f88b18-8dcd-4c83-88f2-8631fdb6250c"),
],
)
for error in errors:
print("error occured when deleting object", error)
# 递归删除前缀
delete_object_list = map(
lambda x: DeleteObject(x.object_name),
client.list_objects("my-bucket", "my/prefix/", recursive=True),
)
errors = client.remove_objects("my-bucket", delete_object_list)
for error in errors:
print("error occured when deleting object", error)
二、对象标签配置
1.删除对象的标签配置
client.delete_object_tags("my-bucket", "my-object")
2.获取对象的标签配置
tags = client.get_object_tags("my-bucket", "my-object")
3.设置对象的标签配置
tags = Tags.new_object_tags()
tags["Project"] = "Project One"
tags["User"] = "jsmith"
client.set_object_tags("my-bucket", "my-object", tags)
三、对象合法保留
1.禁用对象的合法保留
client.disable_object_legal_hold("my-bucket", "my-object")
2.启用对象的合法保留
client.enable_object_legal_hold("my-bucket", "my-object")
3.判断对象的合法保留是否存在
if client.is_object_legal_hold_enabled("my-bucket", "my-object"):
print("legal hold is enabled on my-object")
else:
print("legal hold is not enabled on my-object")
3.获取对象的合法保留信息
config = client.get_object_retention("my-bucket", "my-object")
4.设置对象的合法保留信息
config = Retention(GOVERNANCE, datetime.utcnow() + timedelta(days=10))
client.set_object_retention("my-bucket", "my-object", config)
四、对象预签名 URL
1.获取对象的预签名 URL 以下载其具有到期时间和自定义请求参数的数据
#获取预先签名的URL字符串以在中下载“我的对象”
#“我的桶”默认到期(即7天)。
url = client.presigned_get_object("my-bucket", "my-object")
print(url)
#获取预先签名的URL字符串以在中下载“我的对象”
#“我的桶”过期两小时。
url = client.presigned_get_object(
"my-bucket", "my-object", expires=timedelta(hours=2),
)
print(url)
2.获取对象的预签名 URL 以上传具有到期时间和自定义请求参数的数据
#获取预先签名的URL字符串以在中下载“我的对象”
#“我的桶”默认到期(即7天)。
url = client.presigned_put_object("my-bucket", "my-object")
print(url)
#获取预先签名的URL字符串以在中下载“我的对象”
#“我的桶”过期两小时。
url = client.presigned_put_object(
"my-bucket", "my-object", expires=timedelta(hours=2),
)
print(url)
五、对象 PostPolicy
1.获取对象 PostPolicy 的表单数据以使用 POST 方法上传其数据
policy = PostPolicy(
"my-bucket", datetime.utcnow() + timedelta(days=10),
)
policy.add_starts_with_condition("key", "my/object/prefix/")
policy.add_content_length_range_condition(
1*1024*1024, 10*1024*1024,
)
form_data = client.presigned_post_policy(policy)
六、HTTP 方法
1.获取 HTTP 方法、到期时间和自定义请求参数的对象的预签名 URL
#获取预先签名的URL字符串以删除中的“我的对象”
#“我的桶”过期一天。
url = client.get_presigned_url(
"DELETE",
"my-bucket",
"my-object",
expires=timedelta(days=1),
)
print(url)
#获取预先签名的URL字符串以在中上载“我的对象”
#响应内容类型为application/json的“my bucket
#一天到期。
url = client.get_presigned_url(
"PUT",
"my-bucket",
"my-object",
expires=timedelta(days=1),
response_headers={"response-content-type": "application/json"},
)
print(url)
#获取预先签名的URL字符串以在中下载“我的对象”
#“我的桶”过期两小时。
url = client.get_presigned_url(
"GET",
"my-bucket",
"my-object",
expires=timedelta(hours=2),
)
print(url)
【版权声明】本文为华为云社区用户原创内容,转载时必须标注文章的来源(华为云社区)、文章链接、文章作者等基本信息, 否则作者和本社区有权追究责任。如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱:
cloudbbs@huaweicloud.com
- 点赞
- 收藏
- 关注作者
评论(0)