他的回复:
使用Python 进行数据处理的Demo 如下:"""Sample Code, you can delete this file. Or clear this file content.Execution engine suggests choosing TensorFlowPython version 3.6 or higher"""import argparse //导入一些必要的包import loggingimport osimport pandas as pd# Adapt to ModelArts runtime environment //关于这里,在服务页面右上角的 ? 帮助文档中有说明,注意看下try: import moxing as mox mox.file.shift('os', 'mox')except (ModuleNotFoundError, AttributeError): mox = None logging.info( 'Not exist moxing module. if running on modelarts engine. please select TensorFlow engine. Python Version is 3.6')def argument_init(): //使用时 只需要在执行配置时定义两个 参数 一个是 input_path,输入数据的路径;另外一个 output_path,作为输出数据的路径 //其他两个data_url和train_url 是系统预定义的,不需要用户自己定义 """ Running argument init :return: Running params """ parser = argparse.ArgumentParser(description="pm data process") parser.add_argument('--data_url', type=str, default='s3://bucket_name/folder/', help='System preset parameters.') parser.add_argument('--train_url', type=str, default='s3://bucket_name/folder/', help='System preset parameters.') parser.add_argument('--input_path', type=str, default='', help='User-defined parameter example.') parser.add_argument('--output_path', type=str, default='', help='User-defined parameter example.') return parser.parse_known_args()def read_from_obs(file): //读取file文件,file是OBS路径名(包含文件名的路径),eg,读取一个csv文件,最后返回一个data_frame """ Read data from obs as pandas data frame. :param file: OBS file :return: data frame """ if not os.path.exists(file): return None with open(file, 'rb') as f: data_frame = pd.read_csv(f, encoding='utf-8') return data_frame """ 、、定义你自己的数据处理逻辑def handle_data_yourself(data_frame) //定义你自己的数据处理逻辑..... """def write_to_obs(data_frame, output_path, file_name): //把data_frame写成OBS文件,路径是output_path,文件名是file_name """ Write pandas data frame to OBS. :param data_frame: data frame :param output_path: OBS Path :param file_name: file_name :return: None """ if not os.path.exists(output_path): os.makedirs(output_path) data_frame.to_csv(file_name, encoding='utf-8', index=False) mox.file.copy(file_name, os.path.join(output_path, file_name)) logging.info('success write data to obs.')if __name__ == '__main__': args, un_known = argument_init() demo_file_name = '15_ATS_M477106162.csv' # Read Sample df=read_from_obs(os.path.join(args.input_path, demo_file_name)) //读取文件,返回data frame # Write Sample write_to_obs(df, args.output_path, 'out.csv') //把data frame 写到output_path下的out.csv中注意:本demo 仅仅把文件从输入路径中读取出来 然后又写入了输出路径;在真正的实际项目处理数据过程中,需要自己实现数据所需的处理逻辑