import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math
def get_positional_encoding(max_seq_len, embed_dim):
# max_seq_len : 最大序列长度
# embed_dim : 字嵌入维度
positional_encoding = np.array([
[pos/np.power(10000, 2*i/embed_dim) for i in range(embed_dim)]
if pos != 0 else np.zeros(embed_dim) for pos in range(max_seq_len)
])
positional_encoding[1:, 0::2] = np.sin(positional_encoding[1:, 0::2])
positional_encoding[1:, 1::2] = np.cos(positional_encoding[1:, 1::2])
return positional_encoding
positional_encoding = get_positional_encoding(100, 16)
plt.figure(figsize=(10, 10))
sns.heatmap(positional_encoding)
plt.title("Sinusoidal Function")
plt.xlabel("hidden dimension")
plt.ylabel("sequence dimension")
plt.show()
Positional Encoding位于词嵌入的上一层,数据的输入为词嵌入的输出,即输入维度为(batchSize, seqLen, embedSize),因此Positional Encoding的第三个位置编码的维度也必须是embedSize, 由于Positional Encoding计算后的0维度为MaxseqLen,1维度是embedSize,如果要和词嵌入进行相加操作,必须再扩充一个0维度,即Positional Encoding的维度必须是(1, maxSeqLen, embedSize).
class PositionalEncoding(nn.Module):
def __init__(self, d_hid, n_position=200):
super(PositionalEncoding, self).__init__()
# Not a parameter
self.register_buffer('pos_table', self._get_sinusoid_encoding_table(n_position, d_hid))
def _get_sinusoid_encoding_table(self, n_position, d_hid):
''' Sinusoid position encoding table '''
# TODO: make it with torch instead of numpy
def get_position_angle_vec(position):
return [position / np.power(10000, 2 * (hid_j // 2) / d_hid) for hid_j in range(d_hid)]
sinusoid_table = np.array([get_position_angle_vec(pos_i) for pos_i in range(n_position)])
sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2]) # dim 2i
sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2]) # dim 2i+1
return torch.FloatTensor(sinusoid_table).unsqueeze(0) # shape:(1, maxLen(n_position), d_hid)
def forward(self, x): #x.shape:(batchSize, seqLen, d_hid)
return x + self.pos_table[:, :x.size(1)].clone().detach()