[add]上传训练benchmark by z00560161

This commit is contained in:
liang_chaoming@huawei.com
2020-10-19 20:22:23 +08:00
parent 22b83024f5
commit 82522e2f61
1225 changed files with 345421 additions and 0 deletions
@@ -0,0 +1,135 @@
# VERSION: 20.0.0.RC1
# 说明:提前下载好昇腾cmake包和OpenMPI软件包在在目录下
FROM ubuntu:18.04
ENV http_proxy="http://ptaishanpublic2:Huawei123@90.90.64.10:8080"
ENV https_proxy="http://ptaishanpublic2:Huawei123@90.90.64.10:8080"
ENV no_proxy=127.0.0.1,.huawei.com,localhost,local,.local
ARG TF_PKG=tensorflow-1.15.0-cp37-cp37m-linux_aarch64.whl
ARG HOST_ASCEND_BASE=/usr/local/Ascend
ARG NNAE_PATH=/usr/local/Ascend/nnae/latest
ARG TF_PLUGIN_PATH=/usr/local/Ascend/tfplugin/latest
ARG INSTALL_ASCEND_PKGS_SH=install_ascend_pkgs.sh
ARG PREBUILD_SH=prebuild.sh
ARG POSTBUILD_SH=postbuild.sh
WORKDIR /tmp
COPY . ./
COPY sources.list /etc/apt/
COPY pip.conf /root/.pip/
# 触发prebuild.sh
RUN bash -c "test -f $PREBUILD_SH && bash $PREBUILD_SH || true"
# 系统包
RUN apt update
RUN apt install --no-install-recommends python3.7 python3.7-dev -y
RUN apt install --no-install-recommends curl g++ gcc pkg-config unzip -y
RUN apt install --no-install-recommends libblas3 liblapack3 liblapack-dev libblas-dev gfortran libhdf5-dev libffi-dev libssl-dev\
libicu60 libxml2 -y
# benchmark系统依赖包
RUN apt-get update
RUN apt-get install -y openssh-client
RUN apt-get install -y net-tools
RUN apt-get install -y openssh-server
RUN apt-get install -y inetutils-ping
RUN apt-get install -y psmisc
RUN apt-get install -y iproute2
RUN apt-get install -y wget
RUN apt-get install -y vim
# pip3.7
RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
cd /tmp && \
apt-get download python3-distutils && \
dpkg-deb -x python3-distutils_*.deb / && \
rm python3-distutils_*.deb && \
cd - && \
python3.7 get-pip.py && \
rm get-pip.py
# HwHiAiUser
RUN groupadd HwHiAiUser && \
useradd -g HwHiAiUser -m -d /home/HwHiAiUser HwHiAiUser
# python包
RUN pip3.7 install numpy && \
pip3.7 install decorator && \
pip3.7 install attrs && \
pip3.7 install sympy==1.4 && \
pip3.7 install cffi==1.12.3 && \
pip3.7 install pyyaml && \
pip3.7 install wheel && \
pip3.7 install pathlib2 && \
pip3.7 install grpcio && \
pip3.7 install grpcio-tools && \
pip3.7 install protobuf && \
pip3.7 install scipy && \
pip3.7 install Pillow==5.3.0 && \
pip3 install torchvision --no-deps && \
pip3.7 install requests
# Ascend包
RUN bash $INSTALL_ASCEND_PKGS_SH
# 安装Cmake
RUN tar -zxvf cmake-3.18.0.tar.gz
WORKDIR cmake-3.18.0
RUN mkdir -p /usr/local/cmake-3.18.0
RUN ./configure --prefix=/usr/local/cmake-3.18.0
RUN make && make install
# 安装OpenMPI开源库
WORKDIR /tmp
RUN tar -jxvf openmpi-4.0.2.tar.bz2
WORKDIR openmpi-4.0.2
RUN mkdir -p /usr/local/mpirun4.0.2
RUN ./configure --prefix=/usr/local/mpirun4.0.2
RUN make && make install
WORKDIR /tmp
# TF安装
ENV LD_LIBRARY_PATH=\
/usr/lib/aarch64-linux-gnu/hdf5/serial:\
$HOST_ASCEND_BASE/add-ons:\
$NNAE_PATH/fwkacllib/lib64:\
$HOST_ASCEND_BASE/driver/lib64/common:\
$HOST_ASCEND_BASE/driver/lib64/driver:$LD_LIBRARY_PATH
RUN pip3.7 install $TF_PKG
# 环境变量
ENV GLOG_v=2
ENV TBE_IMPL_PATH=$NNAE_PATH/opp/op_impl/built-in/ai_core/tbe
ENV TF_PLUGIN_PKG=$TF_PLUGIN_PATH/tfplugin/python/site-packages
ENV FWK_PYTHON_PATH=$NNAE_PATH/fwkacllib/python/site-packages
ENV PATH=$NNAE_PATH/fwkacllib/ccec_compiler/bin:$PATH
ENV ASCEND_OPP_PATH=$NNAE_PATH/opp
ENV PYTHONPATH=\
$FWK_PYTHON_PATH:\
$FWK_PYTHON_PATH/auto_tune.egg:\
$FWK_PYTHON_PATH/schedule_search.egg:\
$TF_PLUGIN_PKG:\
$TBE_IMPL_PATH:\
$PYTHONPATH
ENV OPENMPI=/usr/local/mpirun4.0.2/
ENV LD_LIBRARY_PATH=$OPENMPI/lib/
ENV PATH=$OPENMPI/bin:$PATH
# 免密登录
RUN ssh-keygen -t rsa -f ~/.ssh/id_rsa -P '' && cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys && \
sed -i 's/PermitEmptyPasswords yes/PermitEmptyPasswords no /' /etc/ssh/sshd_config && \
sed -i 's/PermitRootLogin without-password/PermitRootLogin yes /' /etc/ssh/sshd_config && \
echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config && \
echo " UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
echo "root:1234" | chpasswd
CMD [ "sh", "-c", "sudo service ssh start; bash"]
# 触发postbuild.sh
RUN bash -c "test -f $POSTBUILD_SH && bash $POSTBUILD_SH || true" && \
rm $POSTBUILD_SH
@@ -0,0 +1,27 @@
#!/bin/bash
#--------------------------------------------------------------------------------
# VERSION: 20.0.0.RC1
# 请在此处使用使用bash语法编写脚本代码,安装昇腾软件包
#
# 注:本脚本运行结束后不会被自动清除,若无需保留在镜像中请在postbuild.sh脚本中清除
#--------------------------------------------------------------------------------
ASCEND_NNAE=Ascend-cann-nnae_20.1.0.B030_linux-aarch64.run
ASCEND_TFPLUGIN=Ascend-fwk-tfplugin_20.1.0.B030_linux-aarch64.run
# 构建之前把host上的/etc/ascend_install.info拷贝一份到当前目录
cp ascend_install.info /etc/
# 构建之前把host的/usr/local/Ascend/driver/version.info拷贝一份到当前目录
mkdir -p /usr/local/Ascend/driver/
cp version.info /usr/local/Ascend/driver/
# Ascend-NNAE-20.0.0.B001-arm64-linux_gcc7.3.0.run
chmod +x ${ASCEND_NNAE}
./${ASCEND_NNAE} --install-path=/usr/local/Ascend/ --install --quiet
# Ascend-TFPlugin-20.0.0.B001-arm64-linux_gcc7.3.0.run
chmod +x ${ASCEND_TFPLUGIN}
./${ASCEND_TFPLUGIN} --install-path=/usr/local/Ascend/ --install --quiet
# 只为了安装nnae包,所以需要清理,容器启动时通过ascend docker挂载进来
rm -f version.info
rm -rf /usr/local/Ascend/driver/
@@ -0,0 +1,39 @@
#!/bin/bash
#--------------------------------------------------------------------------------
# VERSION: 20.0.0.RC1
# 请在此处使用使用bash语法编写脚本代码,清除不需要保留在容器中的安装包、脚本、代理配置等
# 本脚本将会在正式构建过程结束后被执行
#
# 注:本脚本运行结束后会被自动清除,不会残留在镜像中;脚本所在位置和Working Dir位置为/tmp
#--------------------------------------------------------------------------------
rm -f ascend_install.info
rm -f prebuild.sh
rm -f install_ascend_pkgs.sh
rm -f Dockerfile*
rm -f cmake*
rm -f openmpi*
rm -f Ascend-cann-nnae_20.1.0.B030_linux-aarch64.run
rm -f Ascend-fwk-tfplugin_20.1.0.B030_linux-aarch64.run
rm -f tensorflow-1.15.0-cp37-cp37m-linux_aarch64.whl
# rm -f /etc/apt/apt.conf.d/80proxy
tee /etc/resolv.conf <<- EOF
# This file is managed by man:systemd-resolved(8). Do not edit.
#
# This is a dynamic resolv.conf file for connecting local clients to the
# internal DNS stub resolver of systemd-resolved. This file lists all
# configured search domains.
#
# Run "systemd-resolve --status" to see details about the uplink DNS servers
# currently in use.
#
# Third party programs must not access this file directly, but only through the
# symlink at /etc/resolv.conf. To manage man:resolv.conf(5) in a different way,
# replace this symlink by a static file or a different symlink.
#
# See man:systemd-resolved.service(8) for details about the supported modes of
# operation for /etc/resolv.conf.
options edns0
nameserver 8.8.8.8
nameserver 8.8.4.4
EOF
@@ -0,0 +1,16 @@
#!/bin/bash
#--------------------------------------------------------------------------------
# VERSION: 20.0.0.RC1
# 请在此处使用使用bash语法编写脚本代码,执行安装准备工作,例如配置代理等
# 本脚本将会在正式构建过程启动前被执行
#
# 注:本脚本运行结束后不会被自动清除,若无需保留在镜像中请在postbuild.sh脚本中清除
#--------------------------------------------------------------------------------
#dns代理配置,修改“/etc/resolv.conf”文件,在文件中加入如下粗体内容,用户需根据实际情况进行配置。
tee /etc/resolv.conf <<- EOF
nameserver 10.72.255.100
EOF