import numpy as np

Sorting¶

#
# Suppose we have a ndarray
#
data = np.random.rand(10, 4)
data

array([[0.71788956, 0.68685904, 0.85567309, 0.2220448 ],
       [0.1235416 , 0.28665685, 0.61640495, 0.74180159],
       [0.56841641, 0.4769439 , 0.79715462, 0.51322344],
       [0.60981679, 0.84466326, 0.76103981, 0.96965829],
       [0.32337168, 0.10379839, 0.07114038, 0.03556087],
       [0.20234471, 0.94491698, 0.3947953 , 0.46696853],
       [0.41563938, 0.34823204, 0.73246262, 0.10737176],
       [0.2390011 , 0.96714859, 0.0850315 , 0.85092467],
       [0.97371006, 0.96347113, 0.76717253, 0.29075612],
       [0.86029426, 0.6977323 , 0.45472911, 0.29144504]])

#
# Sort each row by sorting along the column axis.
# The default sorting axis is the last axis (axis=-1)
#
np.sort(data)

array([[0.2220448 , 0.68685904, 0.71788956, 0.85567309],
       [0.1235416 , 0.28665685, 0.61640495, 0.74180159],
       [0.4769439 , 0.51322344, 0.56841641, 0.79715462],
       [0.60981679, 0.76103981, 0.84466326, 0.96965829],
       [0.03556087, 0.07114038, 0.10379839, 0.32337168],
       [0.20234471, 0.3947953 , 0.46696853, 0.94491698],
       [0.10737176, 0.34823204, 0.41563938, 0.73246262],
       [0.0850315 , 0.2390011 , 0.85092467, 0.96714859],
       [0.29075612, 0.76717253, 0.96347113, 0.97371006],
       [0.29144504, 0.45472911, 0.6977323 , 0.86029426]])

#
# Sort each column by sorting along the row axis
#
np.sort(data, axis=0)

array([[0.1235416 , 0.10379839, 0.07114038, 0.03556087],
       [0.20234471, 0.28665685, 0.0850315 , 0.10737176],
       [0.2390011 , 0.34823204, 0.3947953 , 0.2220448 ],
       [0.32337168, 0.4769439 , 0.45472911, 0.29075612],
       [0.41563938, 0.68685904, 0.61640495, 0.29144504],
       [0.56841641, 0.6977323 , 0.73246262, 0.46696853],
       [0.60981679, 0.84466326, 0.76103981, 0.51322344],
       [0.71788956, 0.94491698, 0.76717253, 0.74180159],
       [0.86029426, 0.96347113, 0.79715462, 0.85092467],
       [0.97371006, 0.96714859, 0.85567309, 0.96965829]])

Argsort¶

numpy.argsort returns the indices of the original ndarray in sorted order.

np.argsort(data)

array([[3, 1, 0, 2],
       [0, 1, 2, 3],
       [1, 3, 0, 2],
       [0, 2, 1, 3],
       [3, 2, 1, 0],
       [0, 2, 3, 1],
       [3, 1, 0, 2],
       [2, 0, 3, 1],
       [3, 2, 1, 0],
       [3, 2, 1, 0]])

numpy.argsort is best used to sort a single axis ndarray. Suppose we want to sort by the first column of data, and still keep the rows intact.

#
# This is the first column
#
data[:, 0]

array([0.71788956, 0.1235416 , 0.56841641, 0.60981679, 0.32337168,
       0.20234471, 0.41563938, 0.2390011 , 0.97371006, 0.86029426])

#
# We can sort it, but keep the indice positions
#
sorted_idx = np.argsort(data[:, 0])
sorted_idx

array([1, 5, 7, 4, 6, 2, 3, 0, 9, 8])

#
# Now, we can use `sorted_idx` to rearrange data
#
data[sorted_idx, :]

array([[0.1235416 , 0.28665685, 0.61640495, 0.74180159],
       [0.20234471, 0.94491698, 0.3947953 , 0.46696853],
       [0.2390011 , 0.96714859, 0.0850315 , 0.85092467],
       [0.32337168, 0.10379839, 0.07114038, 0.03556087],
       [0.41563938, 0.34823204, 0.73246262, 0.10737176],
       [0.56841641, 0.4769439 , 0.79715462, 0.51322344],
       [0.60981679, 0.84466326, 0.76103981, 0.96965829],
       [0.71788956, 0.68685904, 0.85567309, 0.2220448 ],
       [0.86029426, 0.6977323 , 0.45472911, 0.29144504],
       [0.97371006, 0.96347113, 0.76717253, 0.29075612]])

#
# Let's do reverse sorting.  This can be done by sorting
# `-data` instead of `data`.
#
rev_sorted_idx = np.argsort(-data[:, 0])
rev_sorted_idx

array([8, 9, 0, 3, 2, 6, 4, 7, 5, 1])

#
# Rearrange the rows of data according to reversely sorted first column
#
data[rev_sorted_idx,:]

array([[0.97371006, 0.96347113, 0.76717253, 0.29075612],
       [0.86029426, 0.6977323 , 0.45472911, 0.29144504],
       [0.71788956, 0.68685904, 0.85567309, 0.2220448 ],
       [0.60981679, 0.84466326, 0.76103981, 0.96965829],
       [0.56841641, 0.4769439 , 0.79715462, 0.51322344],
       [0.41563938, 0.34823204, 0.73246262, 0.10737176],
       [0.32337168, 0.10379839, 0.07114038, 0.03556087],
       [0.2390011 , 0.96714859, 0.0850315 , 0.85092467],
       [0.20234471, 0.94491698, 0.3947953 , 0.46696853],
       [0.1235416 , 0.28665685, 0.61640495, 0.74180159]])

Check out numpy.lexsort which acts like numpy.argsort by performs comparison over multiple columns. This is known as lexicographical sorting.

Linear Algebra¶

In linear algebra, we are primarily interested in matrices (ndarrays with 2 axes) and vectors (ndarrays with one axis).

Matrix multiplication¶

#
# Matrix multiplication
#

M = np.arange(12).reshape(3,4)
M

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

N = np.random.rand(2, 3)
N

array([[0.25383512, 0.77080372, 0.80431693],
       [0.23111028, 0.60623686, 0.2129683 ]])

#
# We can multiple N x M, but not M X N.
#

np.dot(N, M)

array([[ 9.51775034, 11.34670611, 13.17566188, 15.00461765],
       [ 4.12869381,  5.17900925,  6.22932468,  7.27964012]])

np.dot(M, N)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-29-d9a30f01e225> in <module>
----> 1 np.dot(M, N)

<__array_function__ internals> in dot(*args, **kwargs)

ValueError: shapes (3,4) and (2,3) not aligned: 4 (dim 1) != 2 (dim 0)

#
# Matrix multiplication has a very convenient shorthand in Python.
#
# Use the `@` operator.
#
N @ M

array([[ 9.51775034, 11.34670611, 13.17566188, 15.00461765],
       [ 4.12869381,  5.17900925,  6.22932468,  7.27964012]])

M @ N

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-33-33fef8e94bc0> in <module>
----> 1 M @ N

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 2 is different from 4)

Matrix and vectors¶

v = np.random.rand(4)
v

array([0.11611099, 0.63996589, 0.64062225, 0.36352282])

M @ v

array([ 3.01177885, 10.05266664, 17.09355443])

N @ v

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-37-fdeebe536b34> in <module>
----> 1 N @ v

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 4 is different from 3)

Dot product between two vectors of the same length is defined as:

$$ u\cdot v = \sum_i u_i\times v_i$$

u = np.random.rand(4)
u

array([0.91007221, 0.61568061, 0.31500047, 0.27632087])

np.dot(u, v)

0.801929221536557

u @ v

0.801929221536557

#
# Don't forget that we can always perform element-wise multiplication of ndarrays of the same dimension
#
u * v

array([0.10566938, 0.39401459, 0.20179631, 0.10044894])

Index

Selected Numpy Functions

Other Functions

Sorting¶

Argsort¶

Linear Algebra¶

Matrix multiplication¶

Matrix and vectors¶