mnist手写数据集识别代码实现

Posted by LemonWhale on December 28, 2023

1. 网络的初始化

class Network(object):
	def __init__(self, sizes):
	        """
	        sizes : 神经元的层数和数量, [2, 3, 1] 三层, 第一层2个神经元, 第二层3个神经元, 第三层1个神经元
	        """
	        self.num_layers = len(sizes)  # 神经元层数
	        self.sizes = sizes
	        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]  # 初始化偏置
	        self.weights = [
	            np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])
	        ]  # 初始化权重

假如初始化 net = Network([2,3,1])

  1. 神经网络初始化需要的参数sizes,是一个包含各层神经元数量的列表,如sizes = [2,3,1]即表示一个三层的神经网络,第一层有两个神经元,第二层有三个神经元,最后一层含有一个神经元。
  2. self.biases = [np.random.randn(y, 1) for y in sizes[1:]]随机初始化偏置,生成一个包含每层偏置的向量的列表,大小为[(3,1),(1,1)]。后一层有几个神经元就有几个偏置。
  3. self.weights = [ np.random.randn(y, x) for x, y in zip(sizes[:-1],sizes[1:])]随机初始化权重,生成一个包含每层权重的向量的列表,大小为[(3,2),(1,3)]。每层之间有多少连线就有多少个权重。

综上,偏置和权重的数量取决于神经元的数量,如果第一层有n个神经元,第二层有m个神经元,则第一层和第二层的连接有m个偏置,有m×n个权重,如下图所示。 偏置和权重的数量 表达式为: \(\left\{ \begin{align} z_{1} &= w_{11}x_{1}+w_{21}x_{2}+b_{1} \\ z_{2} &=w_{12}x_{1}+w_{22}x_{2}+b_{2} \\ z_{3} &= w_{13}x_{1}+w_{23}x_{2}+b_{3}\end{align} \right.\) 用向量表达为: \(Z=W X ^ + B\)

2. 前向传播

def feedforward(self, a):
        """
        实现前向传播

        Args:
            a 输入向量
        """
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a) + b)
        return a

前向传播的函数表达式为: \(a=\sigma(z)\) 其中: \(z=\omega \times x +b\) 激活函数sigmoid函数: \(\sigma(z)=\frac{1}{1+e^{-1}}\)

3. 随机梯度下降

def SGD(self, training_data, epochs, mini_batch_size, eta, test_data=None):
        """使用小批量随机梯度下降算法训练神经网络

        Args:
            training_data : 训练数据
            epochs : 训练轮数
            mini_batch : 一个batch的大小
            eta : learning rate 学习率
            test_data : 测试数据, 默认为None
        """

        if test_data:
            n_test = len(test_data)
        n = len(training_data)
        for j in range(epochs):
            random.shuffle(training_data)  # 随机打乱
            mini_batchs = [
                training_data[k : k + mini_batch_size]
                for k in range(0, n, mini_batch_size)
            ]  # 生成训练用的batchs
            for mini_batch in mini_batchs:
                self.update_mini_batch(mini_batch, eta)  # 更新batch

            # 打印当前批次信息
            if test_data:
                print("Epoch{0}:{1}/{2}".format(j, self.evaluate(test_data), n_test))
            else:
                print("Epoch{} complete".format(j))

SGD:Stochastic Gradient Descent 随机梯度下降

4. 反向传播

import random

import numpy as np


class Network(object):
    def __init__(self, sizes):
        """
        sizes : 神经元的层数和数量, [2, 3, 1] 三层, 第一层2个神经元, 第二层3个神经元, 第三层1个神经元
        """
        self.num_layers = len(sizes)  # 神经元层数
        self.sizes = sizes
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]  # 初始化偏置
        self.weights = [
            np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])
        ]  # 初始化权重

    def feedforward(self, a):
        """
        实现前向传播

        Args:
            a 输入向量
        """
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a) + b)
        return a

    def SGD(self, training_data, epochs, mini_batch_size, eta, test_data=None):
        """使用小批量随机梯度下降算法训练神经网络

        Args:
            training_data : 训练数据
            epochs : 训练轮数
            mini_batch : 一个batch的大小
            eta : learning rate 学习率
            test_data : 测试数据, 默认为None
        """

        if test_data:
            n_test = len(test_data)
        n = len(training_data)
        for j in range(epochs):
            random.shuffle(training_data)  # 随机打乱
            mini_batchs = [
                training_data[k : k + mini_batch_size]
                for k in range(0, n, mini_batch_size)
            ]  # 生成训练用的batchs
            for mini_batch in mini_batchs:
                self.update_mini_batch(mini_batch, eta)  # 更新batch

            # 打印当前批次信息
            if test_data:
                print("Epoch{0}:{1}/{2}".format(j, self.evaluate(test_data), n_test))
            else:
                print("Epoch{} complete".format(j))

    def update_mini_batch(self, mini_batch, eta):
        """更新神经网络的偏置和权重

        Args:
            mini_batch : 小批数据
            eta : learning rate 学习率
        """

        nable_b = [np.zeros(b.shape) for b in self.biases]
        nable_w = [np.zeros(w.shape) for w in self.weights]

        for x, y in mini_batch:
            delta_nable_b, delta_nable_w = self.backprop(x, y)
            nable_b = [nb + dnb for nb, dnb in zip(nable_b, delta_nable_b)]
            nable_w = [nw + dnw for nw, dnw in zip(nable_w, delta_nable_w)]
        self.weights = [
            w - (eta / len(mini_batch)) * nw for w, nw in zip(self.weights, nable_w)
        ]
        self.biases = [
            b - (eta / len(mini_batch)) * nb for b, nb in zip(self.biases, nable_b)
        ]

    def backprop(self, x, y):
        """反向传播函数

        Args:
            x (_type_): _description_
            y (_type_): _description_
        """
        nable_b = [np.zeros(b.shape) for b in self.biases]
        nabel_w = [np.zeros(w.shape) for w in self.weights]

        # 前馈
        activation = x
        activations = [x]
        zs = []

        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation) + b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)

        # 反向传播
        delta = self.cost_derivative(activation[-1], y) * sigmoid_prime(zs[-1])
        nable_b[-1] = delta
        nabel_w[-1] = np.dot(delta, activations[-2].transpose())

        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp
            nable_b[-l] = delta
            nabel_w[-l] = np.dot(delta, activations[-l - 1].transpose())

        return (nable_b, nabel_w)

    def evaluate(self, test_data):
        """验证函数

        Args:
            test_data : 验证集
        """

        test_results = [(np.argmax(self.feedforward(x)), y) for (x, y) in test_data]

        return sum(int(x == y) for (x, y) in test_results)

    def cost_derivative(self, output_activations, y):
        """计算激活值的偏导数"""
        return output_activations - y


def sigmoid(z):
    """sigmoid函数"""
    return 1.0 / (1.0 + np.exp(-z))


def sigmoid_prime(z):
    """sigmoid函数的导数"""
    return sigmoid(z) * (1 - sigmoid(z))