Source code for unimol_tools.utils.util

# Copyright (c) DP Technology.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from hashlib import md5


[docs] def pad_1d_tokens( values, pad_idx, left_pad=False, pad_to_length=None, pad_to_multiple=1, ): """ padding one dimension tokens inputs. :param values: A list of 1d tensors. :param pad_idx: The padding index. :param left_pad: Whether to left pad the tensors. Defaults to False. :param pad_to_length: The desired length of the padded tensors. Defaults to None. :param pad_to_multiple: The multiple to pad the tensors to. Defaults to 1. :return: A padded 1d tensor as a torch.Tensor. """ size = max(v.size(0) for v in values) size = size if pad_to_length is None else max(size, pad_to_length) if pad_to_multiple != 1 and size % pad_to_multiple != 0: size = int(((size - 0.1) // pad_to_multiple + 1) * pad_to_multiple) res = values[0].new(len(values), size).fill_(pad_idx) def copy_tensor(src, dst): assert dst.numel() == src.numel() dst.copy_(src) for i, v in enumerate(values): copy_tensor(v, res[i][size - len(v) :] if left_pad else res[i][: len(v)]) return res
[docs] def pad_2d( values, pad_idx, dim=1, left_pad=False, pad_to_length=None, pad_to_multiple=1, ): """ padding two dimension tensor inputs. :param values: A list of 2d tensors. :param pad_idx: The padding index. :param left_pad: Whether to pad on the left side. Defaults to False. :param pad_to_length: The length to pad the tensors to. If None, the maximum length in the list is used. Defaults to None. :param pad_to_multiple: The multiple to pad the tensors to. Defaults to 1. :return: A padded 2d tensor as a torch.Tensor. """ size = max(v.size(0) for v in values) size = size if pad_to_length is None else max(size, pad_to_length) if pad_to_multiple != 1 and size % pad_to_multiple != 0: size = int(((size - 0.1) // pad_to_multiple + 1) * pad_to_multiple) if dim == 1: res = values[0].new(len(values), size, size).fill_(pad_idx) else: res = values[0].new(len(values), size, size, dim).fill_(pad_idx) def copy_tensor(src, dst): assert dst.numel() == src.numel() dst.copy_(src) for i, v in enumerate(values): copy_tensor( v, ( res[i][size - len(v) :, size - len(v) :] if left_pad else res[i][: len(v), : len(v)] ), ) return res
[docs] def pad_coords( values, pad_idx, dim=3, left_pad=False, pad_to_length=None, pad_to_multiple=1, ): """ padding two dimension tensor coords which the third dimension is 3. :param values: A list of 1d tensors. :param pad_idx: The value used for padding. :param left_pad: Whether to pad on the left side. Defaults to False. :param pad_to_length: The desired length of the padded tensor. Defaults to None. :param pad_to_multiple: The multiple to pad the tensor to. Defaults to 1. :return: A padded 2d coordinate tensor as a torch.Tensor. """ size = max(v.size(0) for v in values) size = size if pad_to_length is None else max(size, pad_to_length) if pad_to_multiple != 1 and size % pad_to_multiple != 0: size = int(((size - 0.1) // pad_to_multiple + 1) * pad_to_multiple) res = values[0].new(len(values), size, dim).fill_(pad_idx) def copy_tensor(src, dst): assert dst.numel() == src.numel() dst.copy_(src) for i, v in enumerate(values): copy_tensor(v, res[i][size - len(v) :, :] if left_pad else res[i][: len(v), :]) return res