log in
register

[Algorithm] 11 Sorting methods in Python

Qingqi@2020-11-26 #algorithm and data structure

# references:
#   https://brilliant.org/wiki/sorting-algorithms/
#   https://blog.csdn.net/xutiantian1412/article/details/100057566

# initial
import random
import numpy
from time import time
from copy import deepcopy


# Bubble Sort
# O(N^2)
def bubble_sort(nums):
    # for functions that changes the value of parameters
    # we'd better deepcopy those parameters
    nums = deepcopy(nums)
    for i in range(len(nums)-1):
        for j in range(len(nums)-i-1):
            if nums[j] > nums[j+1]:
                nums[j], nums[j+1] = nums[j+1], nums[j]
    return nums


# Selection Sort
# exchange the min value directly
# less move than Bubble Sort
# O(n^2)
def selection_sort(nums):
    nums = deepcopy(nums)
    for i in range(len(nums)-1):
        min_i = i
        for j in range(i+1, len(nums)):
            if nums[j] < nums[min_i]:
                min_i = j
        nums[i], nums[min_i] = nums[min_i], nums[i]
    return nums


# Quick Sort
# recursion, partition
# O(nlogn) unstable
def quick_sort(nums):
    if len(nums) <= 1:
        return nums
    left = []
    right = []
    pivot_list = []
    pivot = nums[0]
    for x in nums:
        if x < pivot:
            left.append(x)
        elif x > pivot:
            right.append(x)
        else:
            pivot_list.append(x)
    left = quick_sort(left)
    right = quick_sort(right)
    return left+pivot_list+right


# Insertion Sort
# O(n^2)
def insertion_sort(nums):
    nums = deepcopy(nums)
    for i in range(1, len(nums)):
        curr = nums[i]
        test_i = i-1
        while (test_i >= 0) and (curr < nums[test_i]):
            nums[test_i+1] = nums[test_i]
            test_i -= 1
        nums[test_i+1] = curr
    return nums


# Shell Sort
# let gap = 1, then it's just Insertion Sort
# O(n^(1 to 2))) depends on gap series
def shell_sort(nums):
    nums = deepcopy(nums)
    gap = len(nums)//2
    while gap > 0:
        for i in range(gap, len(nums)):
            curr = nums[i]
            test_i = i-gap
            while (test_i >= 0) and (curr < nums[test_i]):
                nums[test_i+gap] = nums[test_i]
                test_i -= gap
            nums[test_i+gap] = curr
        gap = gap//2
    return nums


# Heap Sort
# binary heap data structure
# for node i (start form 0), (i-1)//2 is parent node
# 2i+1 is leftchild node, 2i+2 is rightchild
# O(nLogn)
def adjust_heap(nums, i, n):
    # the max of parent and 2 children move to parent
    # then treat parent as the new
    left = 2*i+1
    right = 2*i+2
    max_ = i
    if (left < n) and (nums[left] > nums[max_]):
        max_ = left
    if (right < n) and (nums[right] > nums[max_]):
        max_ = right
    if max_ != i:
        nums[i], nums[max_] = nums[max_], nums[i]
        # after exchange, max_ is not max anymore
        # treat max_ as the new parent and recursion adjust
        adjust_heap(nums, max_, n)


def creat_heap(nums, n):
    # Max-heap: a node can't have a greater value than its parent
    for i in reversed(range(n//2)):
        adjust_heap(nums, i, n)


def heap_sort(nums):
    nums = deepcopy(nums)
    n = len(nums)
    creat_heap(nums, n)
    for i in reversed(range(n)):
        # exchange the max one (nums[0]) to i
        nums[0], nums[i] = nums[i], nums[0]
        # but the new nums[0] often not the new max
        # so adjust the unexhanged part
        adjust_heap(nums, 0, i)
    return nums


# Merge Sort
# (divide and sort and merge)
# recursion, partition
# O(nLogn)
def merge_sort(nums):
    n = len(nums)
    if n <= 1:
        return nums
    mid = n//2
    return merge(merge_sort(nums[:mid]), merge_sort(nums[mid:]))


def merge(left, right):
    result = []
    i_left = 0
    i_right = 0
    while i_left < len(left) and i_right < len(right):
        if left[i_left] <= right[i_right]:
            result.append(left[i_left])
            i_left += 1
        else:
            result.append(right[i_right])
            i_right += 1
    if i_left == len(left)-1:
        result.append(left[i_left])
    if i_right == len(right)-1:
        result.append(right[i_right])
    return result


# with list.pop() the code could be shorter
def merge1(left, right):
    result = []
    while left and right:
        if left[0] <= right[0]:
            result.append(left.pop(0))
        else:
            result.append(right.pop(0))
    if left:
        result.append(left.pop(0))
    if right:
        result.append(right.pop(0))
    return result


# Counting Sort
# the input nums must be non-negative integers with max value k known
# count the same numbers from 0 to k
# O(k+n), it's linear! but if k is huge, not good
def counting_sort(nums):
    # globle variable k should exist
    ints = [0]*(k+1)
    for x in nums:
        ints[x] += 1
    result = []
    for i, count in enumerate(ints):
        while count:
            result.append(i)
            count -= 1
    return result


# Bucket Sort
# divide into different range
# then sort each range
# O(n) to O(n^2), depends on the sort method in each bucket
# here I use shell_sort, and divide into same size ranges
# b: number of buckets
# if counting_sort, just like decrease the space complexity of counting_sort
def bucket_sort(nums):
    # use k//(b-1) to make sure there's b buckets
    # use '//' to filter valuse is not efficient, but
    # not important, there's others methods to divide buckets
    buckets = [[x for x in nums if x//(k//(b-1)) == i] for i in range(b)]
    buckets = [shell_sort(bucket) for bucket in buckets]
    return [x for bucket in buckets for x in bucket]


# Radix sort
# O(kn)
# non-comparing method
# d: biggest digital
# LSD (Least significant digital), start from ones digital
def lsd_sort(nums):
    nums = deepcopy(nums)
    for i in range(d):
        buckets = [[] for i in range(10)]
        for x in nums:
            buckets[x//10**i % 10].append(x)
        nums = [x for bucket in buckets for x in bucket]
    return nums


# MSD (Most significant digital), start form maximum digital
def msd_sort(nums):
    def bucket_and_merge(nums, d):
        if d == 0:
            return nums
        d -= 1
        buckets = [[] for i in range(10)]
        for x in nums:
            buckets[x//10**d % 10].append(x)
        for i in range(10):
            buckets[i] = bucket_and_merge(buckets[i], d)
        return [x for bucket in buckets for x in bucket]
    return bucket_and_merge(nums, d)


# test and compare


def runtime(f):
    start = time()
    sort_nums = f(nums)
    end = time()
    for i in range(len(sort_nums)-1):
        if sort_nums[i] > sort_nums[i+1]:
            return numpy.NaN
    return end - start


k = 300
nums = [random.randrange(0, k) for x in range(1000)]
print(runtime(bubble_sort))
print(runtime(selection_sort))
print(runtime(quick_sort))
print(runtime(insertion_sort))
print(runtime(shell_sort))
print(runtime(heap_sort))
print(runtime(merge_sort))
print(runtime(counting_sort))
b = 20
print(runtime(bucket_sort))
d = 3
print(runtime(lsd_sort))
print(runtime(msd_sort))

# bubble_sort 0.0727 s
# selection_s 0.0279 s
# quick_sort 0.0009 s
# insertion_s 0.0339 s
# shell_sort 0.0029 s
# heap_sort 0.0049 s
# merge_sort 0.0029 s
# counting_so 0.0009 s
# bucket_sort 0.0029 s
# lsd_sort 0.0009 s
# msd_sort 0.0019 s
Comments

Log in to add your comment

Don't have an account? register here