import torch

PyTorch Tensor Operations

Elementwise operations

If you operate with a scalar, it will perform that operation on all of the elements.

# Addition
A = torch.rand(size=(3,3))
print(A)
print(A + torch.ones(size=(3,3)))
print(A + 1) # this functionally does the same thing as above!! broadcasts

Output:

tensor([[0.2339, 0.7098, 0.6142],
        [0.0381, 0.8267, 0.1002],
        [0.7466, 0.1895, 0.2673]])
tensor([[1.2339, 1.7098, 1.6142],
        [1.0381, 1.8267, 1.1002],
        [1.7466, 1.1895, 1.2673]])
tensor([[1.2339, 1.7098, 1.6142],
        [1.0381, 1.8267, 1.1002],
        [1.7466, 1.1895, 1.2673]])
# Subtraction
B = torch.rand(size=(3,3))
print(B)
print(B - 10)

Output:

tensor([[0.6257, 0.0488, 0.1315],
        [0.3066, 0.1508, 0.1718],
        [0.8165, 0.6892, 0.5471]])
tensor([[-9.3743, -9.9512, -9.8685],
        [-9.6934, -9.8492, -9.8282],
        [-9.1835, -9.3108, -9.4529]])
# Multiplication
C = torch.rand(size=(2,4))
print(C)
print(C * 0)

Output:

tensor([[0.1088, 0.0498, 0.1934, 0.1715],
        [0.5359, 0.0798, 0.7088, 0.4292]])
tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.]])
# Division
D = torch.rand(size=(9,9))
print(D)
print(D / 10)

Output:

tensor([[0.2233, 0.5975, 0.7809, 0.4628, 0.5885, 0.3520, 0.2315, 0.7225, 0.3431],
        [0.6590, 0.3595, 0.5055, 0.3556, 0.0775, 0.1074, 0.0194, 0.4781, 0.5521],
        [0.6431, 0.9693, 0.8376, 0.8272, 0.6010, 0.2416, 0.0118, 0.1930, 0.3059],
        [0.5878, 0.8586, 0.7320, 0.8026, 0.4704, 0.9836, 0.9053, 0.3315, 0.7002],
        [0.1529, 0.6839, 0.4797, 0.3638, 0.4218, 0.5437, 0.5274, 0.4355, 0.0865],
        [0.2732, 0.0304, 0.9109, 0.9768, 0.4425, 0.2580, 0.4441, 0.6971, 0.9129],
        [0.8907, 0.1630, 0.9689, 0.1476, 0.7911, 0.9523, 0.8729, 0.2922, 0.4187],
        [0.0350, 0.1326, 0.6774, 0.5658, 0.4687, 0.0396, 0.7862, 0.5471, 0.4041],
        [0.3690, 0.5235, 0.5344, 0.5752, 0.6925, 0.4329, 0.3679, 0.7114, 0.3887]])
tensor([[0.0223, 0.0597, 0.0781, 0.0463, 0.0588, 0.0352, 0.0232, 0.0722, 0.0343],
        [0.0659, 0.0360, 0.0505, 0.0356, 0.0077, 0.0107, 0.0019, 0.0478, 0.0552],
        [0.0643, 0.0969, 0.0838, 0.0827, 0.0601, 0.0242, 0.0012, 0.0193, 0.0306],
        [0.0588, 0.0859, 0.0732, 0.0803, 0.0470, 0.0984, 0.0905, 0.0331, 0.0700],
        [0.0153, 0.0684, 0.0480, 0.0364, 0.0422, 0.0544, 0.0527, 0.0436, 0.0087],
        [0.0273, 0.0030, 0.0911, 0.0977, 0.0442, 0.0258, 0.0444, 0.0697, 0.0913],
        [0.0891, 0.0163, 0.0969, 0.0148, 0.0791, 0.0952, 0.0873, 0.0292, 0.0419],
        [0.0035, 0.0133, 0.0677, 0.0566, 0.0469, 0.0040, 0.0786, 0.0547, 0.0404],
        [0.0369, 0.0523, 0.0534, 0.0575, 0.0692, 0.0433, 0.0368, 0.0711, 0.0389]])
# Diff, finds the difference between an element ahead and the current element, for each element
a = torch.arange(10)
# to keep the tensor the same size, use prepend or append, must be of compatible shape
a, torch.diff(a), torch.diff(a, prepend=torch.zeros(1, dtype=torch.long))

Output:

(tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 tensor([1, 1, 1, 1, 1, 1, 1, 1, 1]),
 tensor([0, 1, 1, 1, 1, 1, 1, 1, 1, 1]))

Matrix Multiplication

Most common operation. inner dimensions must match, result is the outer dimensions

(N, M) @ (M, X) (X, N)

a = torch.tensor([4, 5, 6])
b = torch.tensor([1, 2, 3])
 
# Elementwise multiplication
print(a * b)
 
# Proper matrix multiplication
print(a @ b)
print(torch.matmul(a,b))
 
# For matrix multiplcation, the innermost dimension must match!
A = torch.tensor([[1, 2, 3], [4, 5, 6]]) # (2, 3)
B = torch.tensor([[3, 4, 5], [6, 7, 8]]) # (2, 3)
 
print(A @ B.T) # (2, 3) @ (3, 2) -> (2, 2)
print(torch.mm(A, B.T))
print(torch.mm(B.T, A)) # (3, 2) @ (2, 3) -> (3, 3)

Output:

tensor([ 4, 10, 18])
tensor(32)
tensor(32)
tensor([[ 26,  44],
        [ 62, 107]])
tensor([[ 26,  44],
        [ 62, 107]])
tensor([[27, 36, 45],
        [32, 43, 54],
        [37, 50, 63]])
# Linear Layer
# y = Ax + b
 
torch.manual_seed(42)
 
linear_layer = torch.nn.Linear(in_features=3, out_features=6)
 
x = torch.tensor([[1, 2, 1], [1, 2, 1]], dtype=torch.float32)
 
output = linear_layer(x)
output

Output:

tensor([[0.9950, 0.5410, 0.6400, 0.6204, 0.6268, 1.2770],
        [0.9950, 0.5410, 0.6400, 0.6204, 0.6268, 1.2770]],
       grad_fn=<AddmmBackward0>)
import torch
# Inner product
a = torch.rand(10)
b = torch.rand(10)
 
a.T@b, b@a.T, (a.unsqueeze(1)@b.unsqueeze(0)).shape, torch.outer(a,b).shape

Output:

(tensor(2.7346), tensor(2.7346), torch.Size([10, 10]), torch.Size([10, 10]))

Reshaping, Stacking, Squeezing and Unsqueezing, and other “shifters”

MethodOne-line description
torch.reshape(input, shape)Reshapes input to shape (if compatible), can also use torch.Tensor.reshape().
Tensor.view(shape)Returns a view of the original tensor in a different shape but shares the same data as the original tensor.
torch.stack(tensors, dim=0)Concatenates a sequence of tensors along a new dimension (dim), all tensors must be same size.
torch.squeeze(input)Squeezes input to remove all the dimenions with value 1.
torch.unsqueeze(input, dim)Returns input with a dimension value of 1 added at dim.
torch.permute(input, dims)Returns a view of the original input with its dimensions permuted (rearranged) to dims.

Theres also other like torch.roll which shifts elements in a circular fashion within the tensor

Also torch.flip which flips the tensor across a set of dimensions.

x = torch.arange(-100, 100, 2)
X = x.reshape([10, 10])
X_unsqueezed = torch.unsqueeze(X, 2)
print(X_unsqueezed.shape)
 
X_squeezed = torch.squeeze(X_unsqueezed)
print(X_squeezed.shape)
 
X_2 = X.view([10, 10])
X_stacked = torch.stack([X_2, X], dim=0)
print(X_stacked.shape)
 
X_permute = torch.permute(X_stacked, (1, 0, 2))
print(X_permute.shape)

Output:

torch.Size([10, 10, 1])
torch.Size([10, 10])
torch.Size([2, 10, 10])
torch.Size([10, 2, 10])

IMPORTANT TO NOTE THAT RESHAPE CREATES A WHOLE NEW TENSOR WHILE PERMUTE JUST RETURNS A VIEW OF THE TENSOR. SO A VIEW MEANS DOES NOT OWN.

X = torch.rand(size=(50, 10, 3))
X_view = X[:30:2, :8, 2:] # splicing is the same as numpy, start:stop:step
X_view.shape
 

Output:

torch.Size([15, 8, 1])
# Torch Roll, "Shifts the elements right in a circular fashion", neg shift shifts left
A = torch.arange(10)
A, A.roll(shifts=1), A.roll(shifts=-1)

Output:

(tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 tensor([9, 0, 1, 2, 3, 4, 5, 6, 7, 8]),
 tensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 0]))
# Torch Flip
A = torch.arange(16).reshape(2, 2, 4)
A, A.flip(dims=[0,2])

Output:

(tensor([[[ 0,  1,  2,  3],
          [ 4,  5,  6,  7]],
 
         [[ 8,  9, 10, 11],
          [12, 13, 14, 15]]]),
 tensor([[[11, 10,  9,  8],
          [15, 14, 13, 12]],
 
         [[ 3,  2,  1,  0],
          [ 7,  6,  5,  4]]]))

Diagonals and Eye

import torch 
 
# use torch.diag to get the diagonal vector (will work with non-square matricies)
a = torch.rand(3,4)
torch.diag(a)
a.diag()

Output:

tensor([0.3291, 0.9518, 0.7917])
# use torch.eye to get an identity matrix, one value makes a square matrix, two makes a unsymmetrical identity matrix
b = torch.eye(10)
c = torch.eye(3, 10)
c

Output:

tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.]])
# torch.triu filters our the bottom half of a matric across the diagonal (values on the diagonal and kept)
A = torch.ones(8,8)
torch.triu(A) # Both work
A.triu() # both work

Output:

tensor([[1., 1., 1., 1., 1., 1., 1., 1.],
        [0., 1., 1., 1., 1., 1., 1., 1.],
        [0., 0., 1., 1., 1., 1., 1., 1.],
        [0., 0., 0., 1., 1., 1., 1., 1.],
        [0., 0., 0., 0., 1., 1., 1., 1.],
        [0., 0., 0., 0., 0., 1., 1., 1.],
        [0., 0., 0., 0., 0., 0., 1., 1.],
        [0., 0., 0., 0., 0., 0., 0., 1.]])

Bincount

Its a tensor that keeps track of the number of occurences of each number in a tensor (the index is the number). Only works with 1D tensor

A = torch.arange(10)
print(A.bincount())
 
A = torch.tensor([3, 3, 4, 1, 1, 1, 1, 0, 5])
# minlength defines the minimum number of bins to have
A.bincount(minlength=20), torch.bincount(A, minlength=20)

Output:

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

Output:

(tensor([1, 4, 0, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 tensor([1, 4, 0, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))

Scatter Add

Given a tensor, scatter_add Takes a src tensor of values, a tensor of “links” referring what index we should take that value, and adds those values into the tensor

A = torch.tensor([1, 1, 34, 6, 88], dtype=torch.long)
links = torch.tensor([0, 0, 8, 0, 0], dtype=torch.long)
B = torch.zeros(10, dtype=torch.long)
 
A, B, B.scatter_add(dim=0, index=links, src=A)

Output:

(tensor([ 1,  1, 34,  6, 88]),
 tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 tensor([96,  0,  0,  0,  0,  0,  0,  0, 34,  0]))

Heaviside

Returns the output of a heaviside function (values at 0 are defined by another tensor)

But here values is a tensor indicating the value we want to set indexes to if they eq zero

A = torch.tensor([-0.1, 0, 0, 1, 2, -2, 1, 0, 0, 0])
values = torch.tensor([10, 34, 3, 1, 1, 1, 4, 8, 7, 1], dtype=torch.float)
 
torch.heaviside(A, values=values)

Output:

tensor([ 0., 34.,  3.,  1.,  1.,  0.,  1.,  8.,  7.,  1.])

Bucketize

Places values into “buckets” defined by a vector of boundaries.

  • left bucket (default): values exactly on a boundary will fall into the bucket on the left a b c d … ___| __| __| __| __ …

  • right bucket: values exactly on the boundary wiill fall into the bucket on the right a b c d … __ |__ |__ |__ |__ …

boundaries = torch.tensor([1, 4, 8, 10])
values = torch.tensor([3, 19, 10, 5, 8])
 
torch.bucketize(values, boundaries), torch.bucketize(values, boundaries, right=True)

Output:

(tensor([1, 4, 3, 2, 2]), tensor([1, 4, 4, 2, 3]))