Positional Encoding (transformer)-LMLPHP

Positional Encoding (transformer)-LMLPHP

Positional Encoding (transformer)-LMLPHP 

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math


def get_positional_encoding(max_seq_len, embed_dim):
    # max_seq_len : 最大序列长度
    # embed_dim : 字嵌入维度
    positional_encoding = np.array([
        [pos/np.power(10000, 2*i/embed_dim) for i in range(embed_dim)]
        if pos != 0 else np.zeros(embed_dim) for pos in range(max_seq_len)
    ])

    positional_encoding[1:, 0::2] = np.sin(positional_encoding[1:, 0::2])
    positional_encoding[1:, 1::2] = np.cos(positional_encoding[1:, 1::2])
    return positional_encoding


positional_encoding = get_positional_encoding(100, 16)
plt.figure(figsize=(10, 10))
sns.heatmap(positional_encoding)
plt.title("Sinusoidal Function")
plt.xlabel("hidden dimension")
plt.ylabel("sequence dimension")
plt.show()

Positional Encoding (transformer)-LMLPHP

Positional Encoding位于词嵌入的上一层,数据的输入为词嵌入的输出,即输入维度为(batchSize, seqLen, embedSize),因此Positional Encoding的第三个位置编码的维度也必须是embedSize, 由于Positional Encoding计算后的0维度为MaxseqLen,1维度是embedSize,如果要和词嵌入进行相加操作,必须再扩充一个0维度,即Positional Encoding的维度必须是(1, maxSeqLen, embedSize).

class PositionalEncoding(nn.Module):

    def __init__(self, d_hid, n_position=200):
        super(PositionalEncoding, self).__init__()

        # Not a parameter
        self.register_buffer('pos_table', self._get_sinusoid_encoding_table(n_position, d_hid))

    def _get_sinusoid_encoding_table(self, n_position, d_hid):
        ''' Sinusoid position encoding table '''
        # TODO: make it with torch instead of numpy

        def get_position_angle_vec(position):
            return [position / np.power(10000, 2 * (hid_j // 2) / d_hid) for hid_j in range(d_hid)]

        sinusoid_table = np.array([get_position_angle_vec(pos_i) for pos_i in range(n_position)])
        sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2])  # dim 2i
        sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2])  # dim 2i+1

        return torch.FloatTensor(sinusoid_table).unsqueeze(0) # shape:(1, maxLen(n_position), d_hid)

    def forward(self, x): #x.shape:(batchSize, seqLen, d_hid)
        return x + self.pos_table[:, :x.size(1)].clone().detach()

09-09 06:47