3 from __future__
import absolute_import
4 from __future__
import division
5 from __future__
import print_function
10 The following methods are various utility methods for using the Tensor-Train 11 decomposition, or TT-decomposition introduced by I. V. Oseledets (2011) in his 12 paper (http://epubs.siam.org/doi/abs/10.1137/090752286). 14 Broadly speaking, these methods are used to replace fully connected layers in 15 neural networks with Tensor-Train layers introduced by A. Novikov et. al. (2015) 16 in their paper (http://arxiv.org/abs/1509.06569). More details about each of 17 the methods are provided in each respective docstring. 21 def init_tt_cores(inp_sizes, out_sizes, tt_ranks, seed=1234):
23 Initialize randomized orthogonalized TT-cores. 25 This method should be used when a TT-layer is trained from scratch. The 26 sizes of each of the cores are specified by the inp_sizes and out_sizes, and 27 the respective tt_ranks will dictate the ranks of each of the cores. Note 28 that a larger set of tt_ranks will result in slower computation but will 29 result in more accurate approximations. The size of the ith core is: 31 tt_ranks[i] * inp_sizes[i] * out_sizes[i] * tt_ranks[i + 1]. 33 Note that the following relationships of lengths of each input is expected: 35 len(inp_sizes) == len(out_sizes) == len(tt_ranks) - 1. 38 inp_sizes: list of the input dimensions of the respective cores 39 out_sizes: list of the output dimensions of the respective cores 40 tt_ranks: list of the ranks of the respective cores 41 seed: integer to seed the random number generator 44 cores: One-dimensional list of cores concatentated along an axis 49 assert(len(inp_sizes) == len(out_sizes)), \
50 "The number of input dimensions (" + str(len(inp_sizes)) + \
51 ") must be equal to the number of output dimensions (" + \
52 str(len(out_sizes)) +
")." 54 assert(len(tt_ranks) == len(inp_sizes) + 1), \
55 "The number of tt-ranks (" + str(len(tt_ranks)) +
") must be " + \
56 "one more than the number of input and output dims (" + \
57 str(len(out_sizes)) +
")." 60 inp_sizes = np.array(inp_sizes)
61 out_sizes = np.array(out_sizes)
62 tt_ranks = np.array(tt_ranks)
66 inp_sizes * out_sizes * tt_ranks[1:] * tt_ranks[:-1])
67 cores = np.zeros(cores_len)
72 for i
in range(inp_sizes.shape[0]):
79 tall_shape = (np.prod(shape[:3]), shape[3])
82 curr_core = np.dot(rv, np.random.normal(
83 0, 1, size=(shape[0], np.prod(shape[1:]))))
84 curr_core = curr_core.reshape(tall_shape)
87 if i < inp_sizes.shape[0] - 1:
88 curr_core, rv = np.linalg.qr(curr_core)
89 cores[cores_idx:cores_idx +
90 curr_core.size] = curr_core.flatten()
91 cores_idx += curr_core.size
94 glarot_style = (np.prod(inp_sizes) *
95 np.prod(tt_ranks))**(1.0 / inp_sizes.shape[0])
97 return (0.1 / glarot_style) * np.array(cores).astype(np.float32)
100 def matrix_to_tt(W, inp_sizes, out_sizes, tt_ranks):
102 Convert a matrix into the TT-format. 104 This method will consume a 2D weight matrix such as those used in fully 105 connected layers in a neural network and will compute the TT-decomposition 106 of the weight matrix and return the TT-cores of the resulting computation. 107 This method should be used when converting a trained, fully connected layer, 108 into a TT-layer for increased speed and decreased parameter size. The size 111 tt_ranks[i] * inp_sizes[i] * out_sizes[i] * tt_ranks[i + 1]. 113 Note that the following relationships of lengths of each input is expected: 115 len(inp_sizes) == len(out_sizes) == len(tt_ranks) - 1. 117 We also require that np.prod(inp_sizes) == W.shape[0] and that 118 np.prod(out_sizes) == W.shape[1]. 121 W: two-dimensional weight matrix numpy array representing a fully 122 connected layer to be converted to TT-format; note that the weight 123 matrix is transposed before decomposed because we want to emulate the 124 X * W^T operation that the FC layer performs. 125 inp_sizes: list of the input dimensions of the respective cores 126 out_sizes: list of the output dimensions of the respective cores 127 tt_ranks: list of the ranks of the respective cores 130 new_cores: One-dimensional list of cores concatentated along an axis 134 assert(len(inp_sizes) == len(out_sizes)), \
135 "The number of input dimensions (" + str(len(inp_sizes)) + \
136 ") must be equal to the number of output dimensions (" + \
137 str(len(out_sizes)) +
")." 139 assert(len(tt_ranks) == len(inp_sizes) + 1), \
140 "The number of tt-ranks (" + str(len(tt_ranks)) +
") must be " + \
141 "one more than the number of input and output dimensions (" + \
142 str(len(out_sizes)) +
")." 144 assert(W.shape[0] == np.prod(inp_sizes)), \
145 "The product of the input sizes (" + str(np.prod(inp_sizes)) + \
146 ") must be equal to first dimension of W (" + str(W.shape[0]) +
")." 148 assert(W.shape[1] == np.prod(out_sizes)), \
149 "The product of the output sizes (" + str(np.prod(out_sizes)) + \
150 ") must be equal to second dimension of W (" + str(W.shape[1]) +
")." 157 inp_sizes = np.array(inp_sizes)
158 out_sizes = np.array(out_sizes)
159 tt_ranks = np.array(tt_ranks)
166 total_inp_size = inp_sizes.size
167 W_copy = np.reshape(W_copy, np.concatenate((inp_sizes, out_sizes)))
168 order = np.repeat(np.arange(0, total_inp_size), 2) + \
169 np.tile([0, total_inp_size], total_inp_size)
170 W_copy = np.transpose(W_copy, axes=order)
171 W_copy = np.reshape(W_copy, inp_sizes * out_sizes)
175 cores = tt_svd(W_copy, inp_sizes * out_sizes, tt_ranks)
179 new_cores = np.zeros(cores.shape).astype(np.float32)
181 for i
in range(len(inp_sizes)):
182 shape = (tt_ranks[i], inp_sizes[i], out_sizes[i], tt_ranks[i + 1])
183 current_core = cores[idx:idx + np.prod(shape)].reshape(shape)
184 current_core = current_core.transpose((1, 3, 0, 2))
185 new_cores[new_cores.shape[0] - idx - np.prod(shape):
186 new_cores.shape[0] - idx] \
187 = current_core.flatten()
188 idx += np.prod(shape)
193 def tt_svd(W, sizes, tt_ranks):
195 Helper method for the matrix_to_tt() method performing the TT-SVD 198 Uses the TT-decomposition algorithm to convert a matrix to TT-format using 199 multiple reduced SVD operations. 202 W: two-dimensional weight matrix representing a fully connected layer to 203 be converted to TT-format preprocessed by the matrix_to_tt() method. 204 sizes: list of the dimensions of each of the cores 205 tt_ranks: list of the ranks of the respective cores 208 cores: One-dimensional list of cores concatentated along an axis 211 assert(len(tt_ranks) == len(sizes) + 1)
214 total_size = sizes.size
215 core = np.zeros(np.sum(tt_ranks[:-1] * sizes * tt_ranks[1:]),
221 for i
in range(0, total_size - 1):
222 shape = tt_ranks[i] * sizes[i]
223 C = np.reshape(C, [shape, -1])
224 U, S, V = np.linalg.svd(C, full_matrices=
False)
225 U = U[:, 0:tt_ranks[i + 1]]
226 S = S[0:tt_ranks[i + 1]]
227 V = V[0:tt_ranks[i + 1], :]
229 core[pos:pos + tt_ranks[i] * sizes[i] * tt_ranks[i + 1]] = U.ravel()
230 pos += tt_ranks[i] * sizes[i] * tt_ranks[i + 1]
231 C = np.dot(np.diag(S), V)
233 core[pos:pos + tt_ranks[total_size - 1] *
234 sizes[total_size - 1] * tt_ranks[total_size]] = C.ravel()
240 def fc_net_to_tt_net(net):