forked from PulseFocusPlatform/PulseFocusPlatform
305 lines
9.6 KiB
Python
305 lines
9.6 KiB
Python
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
from __future__ import unicode_literals
|
|
|
|
import errno
|
|
import os
|
|
import shutil
|
|
import tempfile
|
|
import time
|
|
import numpy as np
|
|
import re
|
|
import paddle.fluid as fluid
|
|
|
|
from .download import get_weights_path
|
|
|
|
import logging
|
|
logger = logging.getLogger(__name__)
|
|
|
|
__all__ = [
|
|
'load_checkpoint',
|
|
'load_and_fusebn',
|
|
'load_params',
|
|
'save',
|
|
]
|
|
|
|
|
|
def is_url(path):
|
|
"""
|
|
Whether path is URL.
|
|
Args:
|
|
path (string): URL string or not.
|
|
"""
|
|
return path.startswith('http://') or path.startswith('https://')
|
|
|
|
|
|
def _get_weight_path(path):
|
|
env = os.environ
|
|
if 'PADDLE_TRAINERS_NUM' in env and 'PADDLE_TRAINER_ID' in env:
|
|
trainer_id = int(env['PADDLE_TRAINER_ID'])
|
|
num_trainers = int(env['PADDLE_TRAINERS_NUM'])
|
|
if num_trainers <= 1:
|
|
path = get_weights_path(path)
|
|
else:
|
|
from ppdet.utils.download import map_path, WEIGHTS_HOME
|
|
weight_path = map_path(path, WEIGHTS_HOME)
|
|
lock_path = weight_path + '.lock'
|
|
if not os.path.exists(weight_path):
|
|
try:
|
|
os.makedirs(os.path.dirname(weight_path))
|
|
except OSError as e:
|
|
if e.errno != errno.EEXIST:
|
|
raise
|
|
with open(lock_path, 'w'): # touch
|
|
os.utime(lock_path, None)
|
|
if trainer_id == 0:
|
|
get_weights_path(path)
|
|
os.remove(lock_path)
|
|
else:
|
|
while os.path.exists(lock_path):
|
|
time.sleep(1)
|
|
path = weight_path
|
|
else:
|
|
path = get_weights_path(path)
|
|
return path
|
|
|
|
|
|
def _load_state(path):
|
|
if os.path.exists(path + '.pdopt'):
|
|
# XXX another hack to ignore the optimizer state
|
|
tmp = tempfile.mkdtemp()
|
|
dst = os.path.join(tmp, os.path.basename(os.path.normpath(path)))
|
|
shutil.copy(path + '.pdparams', dst + '.pdparams')
|
|
state = fluid.io.load_program_state(dst)
|
|
shutil.rmtree(tmp)
|
|
else:
|
|
state = fluid.io.load_program_state(path)
|
|
return state
|
|
|
|
|
|
def _strip_postfix(path):
|
|
path, ext = os.path.splitext(path)
|
|
assert ext in ['', '.pdparams', '.pdopt', '.pdmodel'], \
|
|
"Unknown postfix {} from weights".format(ext)
|
|
return path
|
|
|
|
|
|
def load_params(exe, prog, path, ignore_params=[]):
|
|
"""
|
|
Load model from the given path.
|
|
Args:
|
|
exe (fluid.Executor): The fluid.Executor object.
|
|
prog (fluid.Program): load weight to which Program object.
|
|
path (string): URL string or loca model path.
|
|
ignore_params (list): ignore variable to load when finetuning.
|
|
It can be specified by finetune_exclude_pretrained_params
|
|
and the usage can refer to docs/advanced_tutorials/TRANSFER_LEARNING.md
|
|
"""
|
|
|
|
if is_url(path):
|
|
path = _get_weight_path(path)
|
|
|
|
path = _strip_postfix(path)
|
|
if not (os.path.isdir(path) or os.path.isfile(path) or
|
|
os.path.exists(path + '.pdparams')):
|
|
raise ValueError("Model pretrain path {} does not "
|
|
"exists.".format(path))
|
|
|
|
logger.debug('Loading parameters from {}...'.format(path))
|
|
|
|
ignore_set = set()
|
|
state = _load_state(path)
|
|
|
|
# ignore the parameter which mismatch the shape
|
|
# between the model and pretrain weight.
|
|
all_var_shape = {}
|
|
for block in prog.blocks:
|
|
for param in block.all_parameters():
|
|
all_var_shape[param.name] = param.shape
|
|
ignore_set.update([
|
|
name for name, shape in all_var_shape.items()
|
|
if name in state and shape != state[name].shape
|
|
])
|
|
|
|
if ignore_params:
|
|
all_var_names = [var.name for var in prog.list_vars()]
|
|
ignore_list = filter(
|
|
lambda var: any([re.match(name, var) for name in ignore_params]),
|
|
all_var_names)
|
|
ignore_set.update(list(ignore_list))
|
|
|
|
if len(ignore_set) > 0:
|
|
for k in ignore_set:
|
|
if k in state:
|
|
logger.warning('variable {} not used'.format(k))
|
|
del state[k]
|
|
fluid.io.set_program_state(prog, state)
|
|
|
|
|
|
def load_checkpoint(exe, prog, path):
|
|
"""
|
|
Load model from the given path.
|
|
Args:
|
|
exe (fluid.Executor): The fluid.Executor object.
|
|
prog (fluid.Program): load weight to which Program object.
|
|
path (string): URL string or loca model path.
|
|
"""
|
|
if is_url(path):
|
|
path = _get_weight_path(path)
|
|
|
|
path = _strip_postfix(path)
|
|
if not (os.path.isdir(path) or os.path.exists(path + '.pdparams')):
|
|
raise ValueError("Model pretrain path {} does not "
|
|
"exists.".format(path))
|
|
fluid.load(prog, path, executor=exe)
|
|
|
|
|
|
def global_step(scope=None):
|
|
"""
|
|
Load global step in scope.
|
|
Args:
|
|
scope (fluid.Scope): load global step from which scope. If None,
|
|
from default global_scope().
|
|
|
|
Returns:
|
|
global step: int.
|
|
"""
|
|
if scope is None:
|
|
scope = fluid.global_scope()
|
|
v = scope.find_var('@LR_DECAY_COUNTER@')
|
|
step = np.array(v.get_tensor())[0] if v else 0
|
|
return step
|
|
|
|
|
|
def save(exe, prog, path):
|
|
"""
|
|
Load model from the given path.
|
|
Args:
|
|
exe (fluid.Executor): The fluid.Executor object.
|
|
prog (fluid.Program): save weight from which Program object.
|
|
path (string): the path to save model.
|
|
"""
|
|
if os.path.isdir(path):
|
|
shutil.rmtree(path)
|
|
logger.info('Save model to {}.'.format(path))
|
|
fluid.save(prog, path)
|
|
|
|
|
|
def load_and_fusebn(exe, prog, path):
|
|
"""
|
|
Fuse params of batch norm to scale and bias.
|
|
|
|
Args:
|
|
exe (fluid.Executor): The fluid.Executor object.
|
|
prog (fluid.Program): save weight from which Program object.
|
|
path (string): the path to save model.
|
|
"""
|
|
logger.debug('Load model and fuse batch norm if have from {}...'.format(
|
|
path))
|
|
|
|
if is_url(path):
|
|
path = _get_weight_path(path)
|
|
|
|
if not os.path.exists(path):
|
|
raise ValueError("Model path {} does not exists.".format(path))
|
|
|
|
# Since the program uses affine-channel, there is no running mean and var
|
|
# in the program, here append running mean and var.
|
|
# NOTE, the params of batch norm should be like:
|
|
# x_scale
|
|
# x_offset
|
|
# x_mean
|
|
# x_variance
|
|
# x is any prefix
|
|
mean_variances = set()
|
|
bn_vars = []
|
|
state = _load_state(path)
|
|
|
|
def check_mean_and_bias(prefix):
|
|
m = prefix + 'mean'
|
|
v = prefix + 'variance'
|
|
return v in state and m in state
|
|
|
|
has_mean_bias = True
|
|
|
|
with fluid.program_guard(prog, fluid.Program()):
|
|
for block in prog.blocks:
|
|
ops = list(block.ops)
|
|
if not has_mean_bias:
|
|
break
|
|
for op in ops:
|
|
if op.type == 'affine_channel':
|
|
# remove 'scale' as prefix
|
|
scale_name = op.input('Scale')[0] # _scale
|
|
bias_name = op.input('Bias')[0] # _offset
|
|
prefix = scale_name[:-5]
|
|
mean_name = prefix + 'mean'
|
|
variance_name = prefix + 'variance'
|
|
if not check_mean_and_bias(prefix):
|
|
has_mean_bias = False
|
|
break
|
|
|
|
bias = block.var(bias_name)
|
|
|
|
mean_vb = block.create_var(
|
|
name=mean_name,
|
|
type=bias.type,
|
|
shape=bias.shape,
|
|
dtype=bias.dtype)
|
|
variance_vb = block.create_var(
|
|
name=variance_name,
|
|
type=bias.type,
|
|
shape=bias.shape,
|
|
dtype=bias.dtype)
|
|
|
|
mean_variances.add(mean_vb)
|
|
mean_variances.add(variance_vb)
|
|
|
|
bn_vars.append(
|
|
[scale_name, bias_name, mean_name, variance_name])
|
|
|
|
if not has_mean_bias:
|
|
fluid.io.set_program_state(prog, state)
|
|
logger.warning(
|
|
"There is no paramters of batch norm in model {}. "
|
|
"Skip to fuse batch norm. And load paramters done.".format(path))
|
|
return
|
|
|
|
fluid.load(prog, path, exe)
|
|
eps = 1e-5
|
|
for names in bn_vars:
|
|
scale_name, bias_name, mean_name, var_name = names
|
|
|
|
scale = fluid.global_scope().find_var(scale_name).get_tensor()
|
|
bias = fluid.global_scope().find_var(bias_name).get_tensor()
|
|
mean = fluid.global_scope().find_var(mean_name).get_tensor()
|
|
var = fluid.global_scope().find_var(var_name).get_tensor()
|
|
|
|
scale_arr = np.array(scale)
|
|
bias_arr = np.array(bias)
|
|
mean_arr = np.array(mean)
|
|
var_arr = np.array(var)
|
|
|
|
bn_std = np.sqrt(np.add(var_arr, eps))
|
|
new_scale = np.float32(np.divide(scale_arr, bn_std))
|
|
new_bias = bias_arr - mean_arr * new_scale
|
|
|
|
# fuse to scale and bias in affine_channel
|
|
scale.set(new_scale, exe.place)
|
|
bias.set(new_bias, exe.place)
|