文章目录
Tensorflow笔记1 常用函数1.1 tf.where()1.2 np.mgrid()1.3 tf.nn.softmax_cross_entropy_with_logits()1.4 model.compile()
2 网络的实现2.1 基础框架——Sequential 和 Class 网络框架2.1.1 tf.keras.models.Sequential()2.1.2 class MyModel (Model)
2.2 功能扩展后的网络2.2.1 构造数据集2.2.2 图片数据增强2.2.3 断点续训、输出网络参数、输出loss曲线2.2.4 手写数字识别——model.predict()
2.3 卷积神经网络2.3.1 卷积——特征提取器2.3.2 卷积层的Tensorflow表示2.3.3 批标准化 (Batch Normalization, BN)2.3.3 批标准化的Tensorflow表示2.3.4 池化(pooling)2.3.5 池化层的Tensorflow表示2.3.6 舍弃(dropout)2.3.7 完整的卷积神经网络——“CBAPD”
2.4 常见的经典卷积神经网络2.4.1 加载cifar10数据集2.4.1 baseline2.4.3 LeNet52.4.4 AlexNet82.4.5 VGGNet162.4.6 Inception102.4.7 ResNet18
3 各知识点代码实现3.1 常用的优化器的代码实现3.1.1 SGD3.1.2 SGDM3.1.3 Adam3.1.4 Adagrad3.1.5 Rmsprop
3.2 梯度更新
Tensorflow笔记
1 常用函数
1.1 tf.where()
import tensorflow
as tf
a
= tf
.constant
([1, 2, 3, 1, 1])
b
= tf
.constant
([0, 1, 3, 4, 5])
c
= tf
.where
(tf
.greater
(a
, b
), a
, b
)
print("c:", c
)
c: tf.Tensor([1 2 3 4 5], shape=(5,), dtype=int32)
1.2 np.mgrid()
import numpy
as np
import tensorflow
as tf
x
, y
= np
.mgrid
[1:3:1, 2:4:0.5]
grid
= np
.c_
[x
.ravel
(), y
.ravel
()]
print("x:\n", x
)
print("y:\n", y
)
print("x.ravel():\n", x
.ravel
())
print("y.ravel():\n", y
.ravel
())
print('grid:\n', grid
)
x: [[1. 1. 1. 1.] [2. 2. 2. 2.]] y: [[2. 2.5 3. 3.5] [2. 2.5 3. 3.5]]
x.ravel(): [1. 1. 1. 1. 2. 2. 2. 2.] y.ravel(): [2. 2.5 3. 3.5 2. 2.5 3. 3.5]
grid: [[1. 2. ] [1. 2.5] [1. 3. ] [1. 3.5] [2. 2. ] [2. 2.5] [2. 3. ] [2. 3.5]]
1.3 tf.nn.softmax_cross_entropy_with_logits()
import tensorflow
as tf
import numpy
as np
y_
= np
.array
([[1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [0, 1, 0]])
y
= np
.array
([[12, 3, 2], [3, 10, 1], [1, 2, 5], [4, 6.5, 1.2], [3, 6, 1]])
y_pro
= tf
.nn
.softmax
(y
)
loss_ce1
= tf
.losses
.categorical_crossentropy
(y_
,y_pro
)
loss_ce2
= tf
.nn
.softmax_cross_entropy_with_logits
(y_
, y
)
print('分步计算的结果:\n', loss_ce1
)
print('结合计算的结果:\n', loss_ce2
)
1.4 model.compile()
model
.compile(optimizer
= 优化器,
loss
= 损失函数,
metrics
= ["准确率”
])
其中:
optimizer可以是字符串形式给出的优化器名字,也可以是函数形式,使用函数形式可以设置学习率、动量和超参数
例如:
“sgd” 或者 tf.optimizers.SGD(lr = 学习率, decay = 学习率衰减率, momentum = 动量参数)
“adagrad" 或者 tf.keras.optimizers.Adagrad(lr = 学习率, decay = 学习率衰减率)
”adadelta" 或者 tf.keras.optimizers.Adadelta(lr = 学习率, decay = 学习率衰减率)
“adam" 或者 tf.keras.optimizers.Adam(lr = 学习率, decay = 学习率衰减率)
loss可以是字符串形式给出的损失函数的名字,也可以是函数形式
例如:
”mse" 或者 tf.keras.losses.MeanSquaredError()
“sparse_categorical_crossentropy” 或者 tf.keras.losses.SparseCatagoricalCrossentropy(from_logits = False) 损失函数经常需要使用softmax函数来将输出转化为概率分布的形式,在这里from_logits代表是否将输出转为概率分布的形式,为False时表示转换为概率分布,为True时表示不转换,直接输出
Metrics标注网络评价指标
例如:
“accuracy” : y_ 和 y 都是数值,如y_ = [1] y = [1] #y_为真实值,y为预测值
“sparse_accuracy":y_和y都是以独热码 和概率分布表示,如y_ = [0, 1, 0], y = [0.256, 0.695, 0.048]
“sparse_categorical_accuracy” :y_是以数值形式给出,y是以 独热码给出,如y_ = [1], y = [0.256 0.695, 0.048]
2 网络的实现
本节按网络的复杂程度进行内容编写。
2.1 基础框架——Sequential 和 Class 网络框架
使用Mnist数据集。下面给出最基本的框架样例。
2.1.1 tf.keras.models.Sequential()
import tensorflow
as tf
mnist
= tf
.keras
.datasets
.mnist
(x_train
, y_train
), (x_test
, y_test
) = mnist
.load_data
()
x_train
, x_test
= x_train
/ 255.0, x_test
/ 255.0
model
= tf
.keras
.models
.Sequential
([
tf
.keras
.layers
.Flatten
(),
tf
.keras
.layers
.Dense
(128, activation
='relu'),
tf
.keras
.layers
.Dense
(10, activation
='softmax')
])
model
.compile(optimizer
='adam',
loss
=tf
.keras
.losses
.SparseCategoricalCrossentropy
(from_logits
=False),
metrics
=['sparse_categorical_accuracy'])
model
.fit
(x_train
, y_train
, batch_size
=32, epochs
=5, validation_data
=(x_test
, y_test
), validation_freq
=1)
model
.summary
()
2.1.2 class MyModel (Model)
import tensorflow
as tf
from tensorflow
.keras
.layers
import Dense
, Flatten
from tensorflow
.keras
import Model
mnist
= tf
.keras
.datasets
.mnist
(x_train
, y_train
), (x_test
, y_test
) = mnist
.load_data
()
x_train
, x_test
= x_train
/ 255.0, x_test
/ 255.0
class MnistModel(Model
):
def __init__(self
):
super(MnistModel
, self
).__init__
()
self
.flatten
= Flatten
()
self
.d1
= Dense
(128, activation
='relu')
self
.d2
= Dense
(10, activation
='softmax')
def call(self
, x
):
x
= self
.flatten
(x
)
x
= self
.d1
(x
)
y
= self
.d2
(x
)
return y
model
= MnistModel
()
model
.compile(optimizer
='adam',
loss
=tf
.keras
.losses
.SparseCategoricalCrossentropy
(from_logits
=False),
metrics
=['sparse_categorical_accuracy'])
model
.fit
(x_train
, y_train
, batch_size
=32, epochs
=5, validation_data
=(x_test
, y_test
), validation_freq
=1)
model
.summary
()
2.2 功能扩展后的网络
2.2.1 构造数据集
标签文件格式如下所示:
文件名 标签
1_9.jpg 9 2_0.jpg 0 3_0.jpg 0 4_3.jpg 3 5_0.jpg 0 6_2.jpg 2 7_7.jpg 7 8_2.jpg 2 9_5.jpg 5 10_5.jpg 5
import tensorflow
as tf
from PIL
import Image
import numpy
as np
import os
train_path
= './fashion_image_label/fashion_train_jpg_60000/'
train_txt
= './fashion_image_label/fashion_train_jpg_60000.txt'
x_train_savepath
= './fashion_image_label/fashion_x_train.npy'
y_train_savepath
= './fashion_image_label/fahion_y_train.npy'
test_path
= './fashion_image_label/fashion_test_jpg_10000/'
test_txt
= './fashion_image_label/fashion_test_jpg_10000.txt'
x_test_savepath
= './fashion_image_label/fashion_x_test.npy'
y_test_savepath
= './fashion_image_label/fashion_y_test.npy'
def generateds(path
, txt
):
f
= open(txt
, 'r')
contents
= f
.readlines
()
f
.close
()
x
, y_
= [], []
for content
in contents
:
value
= content
.split
()
img_path
= path
+ value
[0]
img
= Image
.open(img_path
)
img
= np
.array
(img
.convert
('L'))
img
= img
/ 255.
x
.append
(img
)
y_
.append
(value
[1])
print('loading : ' + content
)
x
= np
.array
(x
)
y_
= np
.array
(y_
)
y_
= y_
.astype
(np
.int64
)
return x
, y_
if os
.path
.exists
(x_train_savepath
) and os
.path
.exists
(y_train_savepath
) and os
.path
.exists
(
x_test_savepath
) and os
.path
.exists
(y_test_savepath
):
print('-------------Load Datasets-----------------')
x_train_save
= np
.load
(x_train_savepath
)
y_train
= np
.load
(y_train_savepath
)
x_test_save
= np
.load
(x_test_savepath
)
y_test
= np
.load
(y_test_savepath
)
x_train
= np
.reshape
(x_train_save
, (len(x_train_save
), 28, 28))
x_test
= np
.reshape
(x_test_save
, (len(x_test_save
), 28, 28))
else:
print('-------------Generate Datasets-----------------')
x_train
, y_train
= generateds
(train_path
, train_txt
)
x_test
, y_test
= generateds
(test_path
, test_txt
)
print('-------------Save Datasets-----------------')
x_train_save
= np
.reshape
(x_train
, (len(x_train
), -1))
x_test_save
= np
.reshape
(x_test
, (len(x_test
), -1))
np
.save
(x_train_savepath
, x_train_save
)
np
.save
(y_train_savepath
, y_train
)
np
.save
(x_test_savepath
, x_test_save
)
np
.save
(y_test_savepath
, y_test
)
model
= tf
.keras
.models
.Sequential
([
tf
.keras
.layers
.Flatten
(),
tf
.keras
.layers
.Dense
(128, activation
='relu'),
tf
.keras
.layers
.Dense
(10, activation
='softmax')
])
model
.compile(optimizer
='adam',
loss
=tf
.keras
.losses
.SparseCategoricalCrossentropy
(from_logits
=False),
metrics
=['sparse_categorical_accuracy'])
model
.fit
(x_train
, y_train
, batch_size
=32, epochs
=5, validation_data
=(x_test
, y_test
), validation_freq
=1)
model
.summary
()
2.2.2 图片数据增强
import tensorflow
as tf
from matplotlib
import pyplot
as plt
from tensorflow
.keras
.preprocessing
.image
import ImageDataGenerator
import numpy
as np
mnist
= tf
.keras
.datasets
.fashion_mnist
(x_train
, y_train
), (x_test
, y_test
) = mnist
.load_data
()
x_train
= x_train
.reshape
(x_train
.shape
[0], 28, 28, 1)
x_test
= x_test
.reshape
(x_test
.shape
[0], 28, 28, 1)
image_gen_train
= ImageDataGenerator
(
rescale
=1. / 255,
rotation_range
=45,
width_shift_range
=.15,
height_shift_range
=.15,
horizontal_flip
=True,
zoom_range
=0.5
)
image_gen_train
.fit
(x_train
)
x_train_subset1
= np
.squeeze
(x_train
[:12])
x_train_subset2
= x_train
[:12]
fig
= plt
.figure
(figsize
=(20, 2))
plt
.set_cmap
('gray')
for i
in range(0, len(x_train_subset1
)):
ax
= fig
.add_subplot
(1, 12, i
+ 1)
ax
.imshow
(x_train_subset1
[i
])
fig
.suptitle
('Subset of Original Training Images', fontsize
=20)
plt
.show
()
fig
= plt
.figure
(figsize
=(20, 2))
for x_batch
in image_gen_train
.flow
(x_train_subset2
, batch_size
=12, shuffle
=False):
for i
in range(0, 12):
ax
= fig
.add_subplot
(1, 12, i
+ 1)
ax
.imshow
(np
.squeeze
(x_batch
[i
]))
fig
.suptitle
('Augmented Images', fontsize
=20)
plt
.show
()
break
2.2.3 断点续训、输出网络参数、输出loss曲线
import tensorflow
as tf
import os
import numpy
as np
from matplotlib
import pyplot
as plt
np
.set_printoptions
(threshold
=np
.inf
)
fashion
= tf
.keras
.datasets
.fashion_mnist
(x_train
, y_train
), (x_test
, y_test
) = fashion
.load_data
()
x_train
, x_test
= x_train
/ 255.0, x_test
/ 255.0
model
= tf
.keras
.models
.Sequential
([
tf
.keras
.layers
.Flatten
(),
tf
.keras
.layers
.Dense
(128, activation
='relu'),
tf
.keras
.layers
.Dense
(10, activation
='softmax')
])
model
.compile(optimizer
='adam',
loss
=tf
.keras
.losses
.SparseCategoricalCrossentropy
(from_logits
=False),
metrics
=['sparse_categorical_accuracy'])
checkpoint_save_path
= "./checkpoint/fashion.ckpt"
if os
.path
.exists
(checkpoint_save_path
+ '.index'):
print('-------------load the model-----------------')
model
.load_weights
(checkpoint_save_path
)
cp_callback
= tf
.keras
.callbacks
.ModelCheckpoint
(filepath
=checkpoint_save_path
,
save_weights_only
=True,
save_best_only
=True)
history
= model
.fit
(x_train
, y_train
,
batch_size
=32,
epochs
=5,
validation_data
=(x_test
, y_test
),
validation_freq
=1,
callbacks
=[cp_callback
]
)
model
.summary
()
print(model
.trainable_variables
)
file = open('./weights.txt', 'w')
for v
in model
.trainable_variables
:
file.write
(str(v
.name
) + '\n')
file.write
(str(v
.shape
) + '\n')
file.write
(str(v
.numpy
()) + '\n')
file.close
()
acc
= history
.history
['sparse_categorical_accuracy']
val_acc
= history
.history
['val_sparse_categorical_accuracy']
loss
= history
.history
['loss']
val_loss
= history
.history
['val_loss']
plt
.subplot
(1, 2, 1)
plt
.plot
(acc
, label
='Training Accuracy')
plt
.plot
(val_acc
, label
='Validation Accuracy')
plt
.title
('Training and Validation Accuracy')
plt
.legend
()
plt
.subplot
(1, 2, 2)
plt
.plot
(loss
, label
='Training Loss')
plt
.plot
(val_loss
, label
='Validation Loss')
plt
.title
('Training and Validation Loss')
plt
.legend
()
plt
.show
()
2.2.4 手写数字识别——model.predict()
from PIL
import Image
import numpy
as np
import tensorflow
as tf
import matplotlib
.pyplot
as plt
type = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
model_save_path
= './checkpoint/fashion.ckpt'
model
= tf
.keras
.models
.Sequential
([
tf
.keras
.layers
.Flatten
(),
tf
.keras
.layers
.Dense
(128, activation
='relu'),
tf
.keras
.layers
.Dense
(10, activation
='softmax')
])
model
.load_weights
(model_save_path
)
preNum
= int(input("input the number of test pictures:"))
for i
in range(preNum
):
image_path
= input("the path of test picture:")
img
= Image
.open(image_path
)
image
= plt
.imread
(image_path
)
plt
.set_cmap
('gray')
plt
.imshow
(image
)
img
=img
.resize
((28,28),Image
.ANTIALIAS
)
img_arr
= np
.array
(img
.convert
('L'))
img_arr
= 255 - img_arr
img_arr
=img_arr
/255.0
x_predict
= img_arr
[tf
.newaxis
,...]
result
= model
.predict
(x_predict
)
pred
=tf
.argmax
(result
, axis
=1)
print('\n')
print(type[int(pred
)])
plt
.pause
(1)
plt
.close
()
2.3 卷积神经网络
卷积神经网络:借助卷积核提取特征后,送入全连接网络。
框架
2.3.1 卷积——特征提取器
实际应用时会先对原始图像进行特征提取再把提取到的特征送给全连接网络。
卷积核
一般用两个3*3的卷积核代替一个5*5的卷积核,因为这样子计算量会减小若是三通道的图片,那么需要深度为3的卷积核(三个卷积核)
▲ 不同类型的卷积核
卷积的计算
感受野
全零填充
2.3.2 卷积层的Tensorflow表示
tf
.keras
.layers
.Conv2D
(
filters
= 卷积核个数
,
kernel_size
= 卷积核尺寸
,
strides
= 滑动步长
,
padding
= “same”
or “valid”
,
activation
= “ relu ”
or “ sigmoid ”
or “ tanh ”
or “ softmax”等
,
input_shape
= (高
, 宽
, 通道数
)
)
在网络中卷积层的三种定义方式:
model
= tf
.keras
.models
.Sequential
([
Conv2D
(6, 5, padding
='valid', activation
='sigmoid'),
MaxPool2D
(2, 2),
Conv2D
(6, (5, 5), padding
='valid', activation
='sigmoid'),
MaxPool2D
(2, (2, 2)),
Conv2D
(filters
=6, kernel_size
=(5, 5),padding
='valid', activation
='sigmoid'),
MaxPool2D
(pool_size
=(2, 2), strides
=2),
Flatten
(),
Dense
(10, activation
='softmax')
])
推荐第三种表达方式,代码可读性较高。
2.3.3 批标准化 (Batch Normalization, BN)
BN层位于卷积层之后,激活层之前。
2.3.3 批标准化的Tensorflow表示
tf.keras.layers.BatchNormalization()
model
= tf
.keras
.models
.Sequential
([
Conv2D
(filters
=6, kernel_size
=(5, 5), padding
='same'),
BatchNormalization
(),
Activation
('relu'),
MaxPool2D
(pool_size
=(2, 2), strides
=2, padding
='same'),
Dropout
(0.2),
])
2.3.4 池化(pooling)
池化用于减少特征数据量。 最大值池化可提取图片纹理,均值池化可保留背景特征。
2.3.5 池化层的Tensorflow表示
tf
.keras
.layers
.MaxPool2D
(
pool_size
=池化核尺寸,
strides
=池化步长,
padding
=‘valid’
or‘same’
)
tf
.keras
.layers
.AveragePooling2D
(
pool_size
=池化核尺寸,
strides
=池化步长,
padding
=‘valid’
or‘same’
)
model
= tf
.keras
.models
.Sequential
([
Conv2D
(filters
=6, kernel_size
=(5, 5), padding
='same'),
BatchNormalization
(),
Activation
('relu'),
MaxPool2D
(pool_size
=(2, 2), strides
=2, padding
='same'),
Dropout
(0.2),
])
2.3.6 舍弃(dropout)
在神经网络训练时,将一部分神经元按照一定概率从神经网络中暂时舍弃。神经网络使用时,被舍弃的神经元恢复链接。
tf.keras.layers.Dropout(舍弃的概率)
model
= tf
.keras
.models
.Sequential
([
Conv2D
(filters
=6, kernel_size
=(5, 5), padding
='same'),
BatchNormalization
(),
Activation
('relu'),
MaxPool2D
(pool_size
=(2, 2), strides
=2, padding
='same'),
Dropout
(0.2),
])
2.3.7 完整的卷积神经网络——“CBAPD”
代码:
import tensorflow
as tf
import os
import numpy
as np
from matplotlib
import pyplot
as plt
from tensorflow
.keras
.layers
import Conv2D
, BatchNormalization
, Activation
, MaxPool2D
, Dropout
, Flatten
, Dense
from tensorflow
.keras
import Model
np
.set_printoptions
(threshold
=np
.inf
)
cifar10
= tf
.keras
.datasets
.cifar10
(x_train
, y_train
), (x_test
, y_test
) = cifar10
.load_data
()
x_train
, x_test
= x_train
/ 255.0, x_test
/ 255.0
class Baseline(Model
):
def __init__(self
):
super(Baseline
, self
).__init__
()
self
.c1
= Conv2D
(filters
=6, kernel_size
=(5, 5), padding
='same')
self
.b1
= BatchNormalization
()
self
.a1
= Activation
('relu')
self
.p1
= MaxPool2D
(pool_size
=(2, 2), strides
=2, padding
='same')
self
.d1
= Dropout
(0.2)
self
.flatten
= Flatten
()
self
.f1
= Dense
(128, activation
='relu')
self
.d2
= Dropout
(0.2)
self
.f2
= Dense
(10, activation
='softmax')
def call(self
, x
):
x
= self
.c1
(x
)
x
= self
.b1
(x
)
x
= self
.a1
(x
)
x
= self
.p1
(x
)
x
= self
.d1
(x
)
x
= self
.flatten
(x
)
x
= self
.f1
(x
)
x
= self
.d2
(x
)
y
= self
.f2
(x
)
return y
model
= Baseline
()
model
.compile(optimizer
='adam',
loss
=tf
.keras
.losses
.SparseCategoricalCrossentropy
(from_logits
=False),
metrics
=['sparse_categorical_accuracy'])
checkpoint_save_path
= "./checkpoint/Baseline.ckpt"
if os
.path
.exists
(checkpoint_save_path
+ '.index'):
print('-------------load the model-----------------')
model
.load_weights
(checkpoint_save_path
)
cp_callback
= tf
.keras
.callbacks
.ModelCheckpoint
(filepath
=checkpoint_save_path
,
save_weights_only
=True,
save_best_only
=True)
history
= model
.fit
(x_train
, y_train
, batch_size
=32, epochs
=5, validation_data
=(x_test
, y_test
), validation_freq
=1,
callbacks
=[cp_callback
])
model
.summary
()
file = open('./weights.txt', 'w')
for v
in model
.trainable_variables
:
file.write
(str(v
.name
) + '\n')
file.write
(str(v
.shape
) + '\n')
file.write
(str(v
.numpy
()) + '\n')
file.close
()
acc
= history
.history
['sparse_categorical_accuracy']
val_acc
= history
.history
['val_sparse_categorical_accuracy']
loss
= history
.history
['loss']
val_loss
= history
.history
['val_loss']
plt
.subplot
(1, 2, 1)
plt
.plot
(acc
, label
='Training Accuracy')
plt
.plot
(val_acc
, label
='Validation Accuracy')
plt
.title
('Training and Validation Accuracy')
plt
.legend
()
plt
.subplot
(1, 2, 2)
plt
.plot
(loss
, label
='Training Loss')
plt
.plot
(val_loss
, label
='Validation Loss')
plt
.title
('Training and Validation Loss')
plt
.legend
()
plt
.show
()
2.4 常见的经典卷积神经网络
以下代码均使用cifar10数据集。 本节按各卷积神经网络出现的时间顺序编写。
Tips:
编写神经网络的时候,可以先把它的框架结构表示出来,然后再根据框架编写代码。牢记"CBAPD"卷积神经网络结构。
2.4.1 加载cifar10数据集
import tensorflow
as tf
from matplotlib
import pyplot
as plt
import numpy
as np
np
.set_printoptions
(threshold
=np
.inf
)
cifar10
= tf
.keras
.datasets
.cifar10
(x_train
, y_train
), (x_test
, y_test
) = cifar10
.load_data
()
plt
.imshow
(x_train
[0])
plt
.show
()
print("x_train[0]:\n", x_train
[0])
print("y_train[0]:\n", y_train
[0])
print("x_train.shape:\n", x_train
.shape
)
print("y_train.shape:\n", y_train
.shape
)
print("x_test.shape:\n", x_test
.shape
)
print("y_test.shape:\n", y_test
.shape
)
2.4.1 baseline
各卷积神经网络的基础框架,不同的卷积神经网络的网络结构部分不同。
import tensorflow
as tf
import os
import numpy
as np
from matplotlib
import pyplot
as plt
from tensorflow
.keras
.layers
import Conv2D
, BatchNormalization
, Activation
, MaxPool2D
, Dropout
, Flatten
, Dense
from tensorflow
.keras
import Model
np
.set_printoptions
(threshold
=np
.inf
)
cifar10
= tf
.keras
.datasets
.cifar10
(x_train
, y_train
), (x_test
, y_test
) = cifar10
.load_data
()
x_train
, x_test
= x_train
/ 255.0, x_test
/ 255.0
class Baseline(Model
):
def __init__(self
):
super(Baseline
, self
).__init__
()
self
.c1
= Conv2D
(filters
=6, kernel_size
=(5, 5), padding
='same')
self
.b1
= BatchNormalization
()
self
.a1
= Activation
('relu')
self
.p1
= MaxPool2D
(pool_size
=(2, 2), strides
=2, padding
='same')
self
.d1
= Dropout
(0.2)
self
.flatten
= Flatten
()
self
.f1
= Dense
(128, activation
='relu')
self
.d2
= Dropout
(0.2)
self
.f2
= Dense
(10, activation
='softmax')
def call(self
, x
):
x
= self
.c1
(x
)
x
= self
.b1
(x
)
x
= self
.a1
(x
)
x
= self
.p1
(x
)
x
= self
.d1
(x
)
x
= self
.flatten
(x
)
x
= self
.f1
(x
)
x
= self
.d2
(x
)
y
= self
.f2
(x
)
return y
model
= Baseline
()
model
.compile(optimizer
='adam',
loss
=tf
.keras
.losses
.SparseCategoricalCrossentropy
(from_logits
=False),
metrics
=['sparse_categorical_accuracy'])
checkpoint_save_path
= "./checkpoint/Baseline.ckpt"
if os
.path
.exists
(checkpoint_save_path
+ '.index'):
print('-------------load the model-----------------')
model
.load_weights
(checkpoint_save_path
)
cp_callback
= tf
.keras
.callbacks
.ModelCheckpoint
(filepath
=checkpoint_save_path
,
save_weights_only
=True,
save_best_only
=True)
history
= model
.fit
(x_train
, y_train
, batch_size
=32, epochs
=5, validation_data
=(x_test
, y_test
), validation_freq
=1,
callbacks
=[cp_callback
])
model
.summary
()
file = open('./weights.txt', 'w')
for v
in model
.trainable_variables
:
file.write
(str(v
.name
) + '\n')
file.write
(str(v
.shape
) + '\n')
file.write
(str(v
.numpy
()) + '\n')
file.close
()
acc
= history
.history
['sparse_categorical_accuracy']
val_acc
= history
.history
['val_sparse_categorical_accuracy']
loss
= history
.history
['loss']
val_loss
= history
.history
['val_loss']
plt
.subplot
(1, 2, 1)
plt
.plot
(acc
, label
='Training Accuracy')
plt
.plot
(val_acc
, label
='Validation Accuracy')
plt
.title
('Training and Validation Accuracy')
plt
.legend
()
plt
.subplot
(1, 2, 2)
plt
.plot
(loss
, label
='Training Loss')
plt
.plot
(val_loss
, label
='Validation Loss')
plt
.title
('Training and Validation Loss')
plt
.legend
()
plt
.show
()
2.4.3 LeNet5
import tensorflow
as tf
import os
import numpy
as np
from matplotlib
import pyplot
as plt
from tensorflow
.keras
.layers
import Conv2D
, BatchNormalization
, Activation
, MaxPool2D
, Dropout
, Flatten
, Dense
from tensorflow
.keras
import Model
np
.set_printoptions
(threshold
=np
.inf
)
cifar10
= tf
.keras
.datasets
.cifar10
(x_train
, y_train
), (x_test
, y_test
) = cifar10
.load_data
()
x_train
, x_test
= x_train
/ 255.0, x_test
/ 255.0
class LeNet5(Model
):
def __init__(self
):
super(LeNet5
, self
).__init__
()
self
.c1
= Conv2D
(filters
=6, kernel_size
=(5, 5),
activation
='sigmoid')
self
.p1
= MaxPool2D
(pool_size
=(2, 2), strides
=2)
self
.c2
= Conv2D
(filters
=16, kernel_size
=(5, 5),
activation
='sigmoid')
self
.p2
= MaxPool2D
(pool_size
=(2, 2), strides
=2)
self
.flatten
= Flatten
()
self
.f1
= Dense
(120, activation
='sigmoid')
self
.f2
= Dense
(84, activation
='sigmoid')
self
.f3
= Dense
(10, activation
='softmax')
def call(self
, x
):
x
= self
.c1
(x
)
x
= self
.p1
(x
)
x
= self
.c2
(x
)
x
= self
.p2
(x
)
x
= self
.flatten
(x
)
x
= self
.f1
(x
)
x
= self
.f2
(x
)
y
= self
.f3
(x
)
return y
model
= LeNet5
()
model
.compile(optimizer
='adam',
loss
=tf
.keras
.losses
.SparseCategoricalCrossentropy
(from_logits
=False),
metrics
=['sparse_categorical_accuracy'])
checkpoint_save_path
= "./checkpoint/LeNet5.ckpt"
if os
.path
.exists
(checkpoint_save_path
+ '.index'):
print('-------------load the model-----------------')
model
.load_weights
(checkpoint_save_path
)
cp_callback
= tf
.keras
.callbacks
.ModelCheckpoint
(filepath
=checkpoint_save_path
,
save_weights_only
=True,
save_best_only
=True)
history
= model
.fit
(x_train
, y_train
, batch_size
=32, epochs
=5, validation_data
=(x_test
, y_test
), validation_freq
=1,
callbacks
=[cp_callback
])
model
.summary
()
file = open('./weights.txt', 'w')
for v
in model
.trainable_variables
:
file.write
(str(v
.name
) + '\n')
file.write
(str(v
.shape
) + '\n')
file.write
(str(v
.numpy
()) + '\n')
file.close
()
acc
= history
.history
['sparse_categorical_accuracy']
val_acc
= history
.history
['val_sparse_categorical_accuracy']
loss
= history
.history
['loss']
val_loss
= history
.history
['val_loss']
plt
.subplot
(1, 2, 1)
plt
.plot
(acc
, label
='Training Accuracy')
plt
.plot
(val_acc
, label
='Validation Accuracy')
plt
.title
('Training and Validation Accuracy')
plt
.legend
()
plt
.subplot
(1, 2, 2)
plt
.plot
(loss
, label
='Training Loss')
plt
.plot
(val_loss
, label
='Validation Loss')
plt
.title
('Training and Validation Loss')
plt
.legend
()
plt
.show
()
2.4.4 AlexNet8
import tensorflow
as tf
import os
import numpy
as np
from matplotlib
import pyplot
as plt
from tensorflow
.keras
.layers
import Conv2D
, BatchNormalization
, Activation
, MaxPool2D
, Dropout
, Flatten
, Dense
from tensorflow
.keras
import Model
np
.set_printoptions
(threshold
=np
.inf
)
cifar10
= tf
.keras
.datasets
.cifar10
(x_train
, y_train
), (x_test
, y_test
) = cifar10
.load_data
()
x_train
, x_test
= x_train
/ 255.0, x_test
/ 255.0
class AlexNet8(Model
):
def __init__(self
):
super(AlexNet8
, self
).__init__
()
self
.c1
= Conv2D
(filters
=96, kernel_size
=(3, 3))
self
.b1
= BatchNormalization
()
self
.a1
= Activation
('relu')
self
.p1
= MaxPool2D
(pool_size
=(3, 3), strides
=2)
self
.c2
= Conv2D
(filters
=256, kernel_size
=(3, 3))
self
.b2
= BatchNormalization
()
self
.a2
= Activation
('relu')
self
.p2
= MaxPool2D
(pool_size
=(3, 3), strides
=2)
self
.c3
= Conv2D
(filters
=384, kernel_size
=(3, 3), padding
='same',
activation
='relu')
self
.c4
= Conv2D
(filters
=384, kernel_size
=(3, 3), padding
='same',
activation
='relu')
self
.c5
= Conv2D
(filters
=256, kernel_size
=(3, 3), padding
='same',
activation
='relu')
self
.p3
= MaxPool2D
(pool_size
=(3, 3), strides
=2)
self
.flatten
= Flatten
()
self
.f1
= Dense
(2048, activation
='relu')
self
.d1
= Dropout
(0.5)
self
.f2
= Dense
(2048, activation
='relu')
self
.d2
= Dropout
(0.5)
self
.f3
= Dense
(10, activation
='softmax')
def call(self
, x
):
x
= self
.c1
(x
)
x
= self
.b1
(x
)
x
= self
.a1
(x
)
x
= self
.p1
(x
)
x
= self
.c2
(x
)
x
= self
.b2
(x
)
x
= self
.a2
(x
)
x
= self
.p2
(x
)
x
= self
.c3
(x
)
x
= self
.c4
(x
)
x
= self
.c5
(x
)
x
= self
.p3
(x
)
x
= self
.flatten
(x
)
x
= self
.f1
(x
)
x
= self
.d1
(x
)
x
= self
.f2
(x
)
x
= self
.d2
(x
)
y
= self
.f3
(x
)
return y
model
= AlexNet8
()
model
.compile(optimizer
='adam',
loss
=tf
.keras
.losses
.SparseCategoricalCrossentropy
(from_logits
=False),
metrics
=['sparse_categorical_accuracy'])
checkpoint_save_path
= "./checkpoint/AlexNet8.ckpt"
if os
.path
.exists
(checkpoint_save_path
+ '.index'):
print('-------------load the model-----------------')
model
.load_weights
(checkpoint_save_path
)
cp_callback
= tf
.keras
.callbacks
.ModelCheckpoint
(filepath
=checkpoint_save_path
,
save_weights_only
=True,
save_best_only
=True)
history
= model
.fit
(x_train
, y_train
, batch_size
=32, epochs
=5, validation_data
=(x_test
, y_test
), validation_freq
=1,
callbacks
=[cp_callback
])
model
.summary
()
file = open('./weights.txt', 'w')
for v
in model
.trainable_variables
:
file.write
(str(v
.name
) + '\n')
file.write
(str(v
.shape
) + '\n')
file.write
(str(v
.numpy
()) + '\n')
file.close
()
acc
= history
.history
['sparse_categorical_accuracy']
val_acc
= history
.history
['val_sparse_categorical_accuracy']
loss
= history
.history
['loss']
val_loss
= history
.history
['val_loss']
plt
.subplot
(1, 2, 1)
plt
.plot
(acc
, label
='Training Accuracy')
plt
.plot
(val_acc
, label
='Validation Accuracy')
plt
.title
('Training and Validation Accuracy')
plt
.legend
()
plt
.subplot
(1, 2, 2)
plt
.plot
(loss
, label
='Training Loss')
plt
.plot
(val_loss
, label
='Validation Loss')
plt
.title
('Training and Validation Loss')
plt
.legend
()
plt
.show
()
2.4.5 VGGNet16
import tensorflow
as tf
import os
import numpy
as np
from matplotlib
import pyplot
as plt
from tensorflow
.keras
.layers
import Conv2D
, BatchNormalization
, Activation
, MaxPool2D
, Dropout
, Flatten
, Dense
from tensorflow
.keras
import Model
np
.set_printoptions
(threshold
=np
.inf
)
cifar10
= tf
.keras
.datasets
.cifar10
(x_train
, y_train
), (x_test
, y_test
) = cifar10
.load_data
()
x_train
, x_test
= x_train
/ 255.0, x_test
/ 255.0
class VGG16(Model
):
def __init__(self
):
super(VGG16
, self
).__init__
()
self
.c1
= Conv2D
(filters
=64, kernel_size
=(3, 3), padding
='same')
self
.b1
= BatchNormalization
()
self
.a1
= Activation
('relu')
self
.c2
= Conv2D
(filters
=64, kernel_size
=(3, 3), padding
='same', )
self
.b2
= BatchNormalization
()
self
.a2
= Activation
('relu')
self
.p1
= MaxPool2D
(pool_size
=(2, 2), strides
=2, padding
='same')
self
.d1
= Dropout
(0.2)
self
.c3
= Conv2D
(filters
=128, kernel_size
=(3, 3), padding
='same')
self
.b3
= BatchNormalization
()
self
.a3
= Activation
('relu')
self
.c4
= Conv2D
(filters
=128, kernel_size
=(3, 3), padding
='same')
self
.b4
= BatchNormalization
()
self
.a4
= Activation
('relu')
self
.p2
= MaxPool2D
(pool_size
=(2, 2), strides
=2, padding
='same')
self
.d2
= Dropout
(0.2)
self
.c5
= Conv2D
(filters
=256, kernel_size
=(3, 3), padding
='same')
self
.b5
= BatchNormalization
()
self
.a5
= Activation
('relu')
self
.c6
= Conv2D
(filters
=256, kernel_size
=(3, 3), padding
='same')
self
.b6
= BatchNormalization
()
self
.a6
= Activation
('relu')
self
.c7
= Conv2D
(filters
=256, kernel_size
=(3, 3), padding
='same')
self
.b7
= BatchNormalization
()
self
.a7
= Activation
('relu')
self
.p3
= MaxPool2D
(pool_size
=(2, 2), strides
=2, padding
='same')
self
.d3
= Dropout
(0.2)
self
.c8
= Conv2D
(filters
=512, kernel_size
=(3, 3), padding
='same')
self
.b8
= BatchNormalization
()
self
.a8
= Activation
('relu')
self
.c9
= Conv2D
(filters
=512, kernel_size
=(3, 3), padding
='same')
self
.b9
= BatchNormalization
()
self
.a9
= Activation
('relu')
self
.c10
= Conv2D
(filters
=512, kernel_size
=(3, 3), padding
='same')
self
.b10
= BatchNormalization
()
self
.a10
= Activation
('relu')
self
.p4
= MaxPool2D
(pool_size
=(2, 2), strides
=2, padding
='same')
self
.d4
= Dropout
(0.2)
self
.c11
= Conv2D
(filters
=512, kernel_size
=(3, 3), padding
='same')
self
.b11
= BatchNormalization
()
self
.a11
= Activation
('relu')
self
.c12
= Conv2D
(filters
=512, kernel_size
=(3, 3), padding
='same')
self
.b12
= BatchNormalization
()
self
.a12
= Activation
('relu')
self
.c13
= Conv2D
(filters
=512, kernel_size
=(3, 3), padding
='same')
self
.b13
= BatchNormalization
()
self
.a13
= Activation
('relu')
self
.p5
= MaxPool2D
(pool_size
=(2, 2), strides
=2, padding
='same')
self
.d5
= Dropout
(0.2)
self
.flatten
= Flatten
()
self
.f1
= Dense
(512, activation
='relu')
self
.d6
= Dropout
(0.2)
self
.f2
= Dense
(512, activation
='relu')
self
.d7
= Dropout
(0.2)
self
.f3
= Dense
(10, activation
='softmax')
def call(self
, x
):
x
= self
.c1
(x
)
x
= self
.b1
(x
)
x
= self
.a1
(x
)
x
= self
.c2
(x
)
x
= self
.b2
(x
)
x
= self
.a2
(x
)
x
= self
.p1
(x
)
x
= self
.d1
(x
)
x
= self
.c3
(x
)
x
= self
.b3
(x
)
x
= self
.a3
(x
)
x
= self
.c4
(x
)
x
= self
.b4
(x
)
x
= self
.a4
(x
)
x
= self
.p2
(x
)
x
= self
.d2
(x
)
x
= self
.c5
(x
)
x
= self
.b5
(x
)
x
= self
.a5
(x
)
x
= self
.c6
(x
)
x
= self
.b6
(x
)
x
= self
.a6
(x
)
x
= self
.c7
(x
)
x
= self
.b7
(x
)
x
= self
.a7
(x
)
x
= self
.p3
(x
)
x
= self
.d3
(x
)
x
= self
.c8
(x
)
x
= self
.b8
(x
)
x
= self
.a8
(x
)
x
= self
.c9
(x
)
x
= self
.b9
(x
)
x
= self
.a9
(x
)
x
= self
.c10
(x
)
x
= self
.b10
(x
)
x
= self
.a10
(x
)
x
= self
.p4
(x
)
x
= self
.d4
(x
)
x
= self
.c11
(x
)
x
= self
.b11
(x
)
x
= self
.a11
(x
)
x
= self
.c12
(x
)
x
= self
.b12
(x
)
x
= self
.a12
(x
)
x
= self
.c13
(x
)
x
= self
.b13
(x
)
x
= self
.a13
(x
)
x
= self
.p5
(x
)
x
= self
.d5
(x
)
x
= self
.flatten
(x
)
x
= self
.f1
(x
)
x
= self
.d6
(x
)
x
= self
.f2
(x
)
x
= self
.d7
(x
)
y
= self
.f3
(x
)
return y
model
= VGG16
()
model
.compile(optimizer
='adam',
loss
=tf
.keras
.losses
.SparseCategoricalCrossentropy
(from_logits
=False),
metrics
=['sparse_categorical_accuracy'])
checkpoint_save_path
= "./checkpoint/VGG16.ckpt"
if os
.path
.exists
(checkpoint_save_path
+ '.index'):
print('-------------load the model-----------------')
model
.load_weights
(checkpoint_save_path
)
cp_callback
= tf
.keras
.callbacks
.ModelCheckpoint
(filepath
=checkpoint_save_path
,
save_weights_only
=True,
save_best_only
=True)
history
= model
.fit
(x_train
, y_train
, batch_size
=32, epochs
=5, validation_data
=(x_test
, y_test
), validation_freq
=1,
callbacks
=[cp_callback
])
model
.summary
()
file = open('./weights.txt', 'w')
for v
in model
.trainable_variables
:
file.write
(str(v
.name
) + '\n')
file.write
(str(v
.shape
) + '\n')
file.write
(str(v
.numpy
()) + '\n')
file.close
()
acc
= history
.history
['sparse_categorical_accuracy']
val_acc
= history
.history
['val_sparse_categorical_accuracy']
loss
= history
.history
['loss']
val_loss
= history
.history
['val_loss']
plt
.subplot
(1, 2, 1)
plt
.plot
(acc
, label
='Training Accuracy')
plt
.plot
(val_acc
, label
='Validation Accuracy')
plt
.title
('Training and Validation Accuracy')
plt
.legend
()
plt
.subplot
(1, 2, 2)
plt
.plot
(loss
, label
='Training Loss')
plt
.plot
(val_loss
, label
='Validation Loss')
plt
.title
('Training and Validation Loss')
plt
.legend
()
plt
.show
()
2.4.6 Inception10
import tensorflow
as tf
import os
import numpy
as np
from matplotlib
import pyplot
as plt
from tensorflow
.keras
.layers
import Conv2D
, BatchNormalization
, Activation
, MaxPool2D
, Dropout
, Flatten
, Dense
, \
GlobalAveragePooling2D
from tensorflow
.keras
import Model
np
.set_printoptions
(threshold
=np
.inf
)
cifar10
= tf
.keras
.datasets
.cifar10
(x_train
, y_train
), (x_test
, y_test
) = cifar10
.load_data
()
x_train
, x_test
= x_train
/ 255.0, x_test
/ 255.0
class ConvBNRelu(Model
):
def __init__(self
, ch
, kernelsz
=3, strides
=1, padding
='same'):
super(ConvBNRelu
, self
).__init__
()
self
.model
= tf
.keras
.models
.Sequential
([
Conv2D
(ch
, kernelsz
, strides
=strides
, padding
=padding
),
BatchNormalization
(),
Activation
('relu')
])
def call(self
, x
):
x
= self
.model
(x
, training
=False)
return x
class InceptionBlk(Model
):
def __init__(self
, ch
, strides
=1):
super(InceptionBlk
, self
).__init__
()
self
.ch
= ch
self
.strides
= strides
self
.c1
= ConvBNRelu
(ch
, kernelsz
=1, strides
=strides
)
self
.c2_1
= ConvBNRelu
(ch
, kernelsz
=1, strides
=strides
)
self
.c2_2
= ConvBNRelu
(ch
, kernelsz
=3, strides
=1)
self
.c3_1
= ConvBNRelu
(ch
, kernelsz
=1, strides
=strides
)
self
.c3_2
= ConvBNRelu
(ch
, kernelsz
=5, strides
=1)
self
.p4_1
= MaxPool2D
(3, strides
=1, padding
='same')
self
.c4_2
= ConvBNRelu
(ch
, kernelsz
=1, strides
=strides
)
def call(self
, x
):
x1
= self
.c1
(x
)
x2_1
= self
.c2_1
(x
)
x2_2
= self
.c2_2
(x2_1
)
x3_1
= self
.c3_1
(x
)
x3_2
= self
.c3_2
(x3_1
)
x4_1
= self
.p4_1
(x
)
x4_2
= self
.c4_2
(x4_1
)
x
= tf
.concat
([x1
, x2_2
, x3_2
, x4_2
], axis
=3)
return x
class Inception10(Model
):
def __init__(self
, num_blocks
, num_classes
, init_ch
=16, **kwargs
):
super(Inception10
, self
).__init__
(**kwargs
)
self
.in_channels
= init_ch
self
.out_channels
= init_ch
self
.num_blocks
= num_blocks
self
.init_ch
= init_ch
self
.c1
= ConvBNRelu
(init_ch
)
self
.blocks
= tf
.keras
.models
.Sequential
()
for block_id
in range(num_blocks
):
for layer_id
in range(2):
if layer_id
== 0:
block
= InceptionBlk
(self
.out_channels
, strides
=2)
else:
block
= InceptionBlk
(self
.out_channels
, strides
=1)
self
.blocks
.add
(block
)
self
.out_channels
*= 2
self
.p1
= GlobalAveragePooling2D
()
self
.f1
= Dense
(num_classes
, activation
='softmax')
def call(self
, x
):
x
= self
.c1
(x
)
x
= self
.blocks
(x
)
x
= self
.p1
(x
)
y
= self
.f1
(x
)
return y
model
= Inception10
(num_blocks
=2, num_classes
=10)
model
.compile(optimizer
='adam',
loss
=tf
.keras
.losses
.SparseCategoricalCrossentropy
(from_logits
=False),
metrics
=['sparse_categorical_accuracy'])
checkpoint_save_path
= "./checkpoint/Inception10.ckpt"
if os
.path
.exists
(checkpoint_save_path
+ '.index'):
print('-------------load the model-----------------')
model
.load_weights
(checkpoint_save_path
)
cp_callback
= tf
.keras
.callbacks
.ModelCheckpoint
(filepath
=checkpoint_save_path
,
save_weights_only
=True,
save_best_only
=True)
history
= model
.fit
(x_train
, y_train
, batch_size
=32, epochs
=5, validation_data
=(x_test
, y_test
), validation_freq
=1,
callbacks
=[cp_callback
])
model
.summary
()
file = open('./weights.txt', 'w')
for v
in model
.trainable_variables
:
file.write
(str(v
.name
) + '\n')
file.write
(str(v
.shape
) + '\n')
file.write
(str(v
.numpy
()) + '\n')
file.close
()
acc
= history
.history
['sparse_categorical_accuracy']
val_acc
= history
.history
['val_sparse_categorical_accuracy']
loss
= history
.history
['loss']
val_loss
= history
.history
['val_loss']
plt
.subplot
(1, 2, 1)
plt
.plot
(acc
, label
='Training Accuracy')
plt
.plot
(val_acc
, label
='Validation Accuracy')
plt
.title
('Training and Validation Accuracy')
plt
.legend
()
plt
.subplot
(1, 2, 2)
plt
.plot
(loss
, label
='Training Loss')
plt
.plot
(val_loss
, label
='Validation Loss')
plt
.title
('Training and Validation Loss')
plt
.legend
()
plt
.show
()
2.4.7 ResNet18
import tensorflow
as tf
import os
import numpy
as np
from matplotlib
import pyplot
as plt
from tensorflow
.keras
.layers
import Conv2D
, BatchNormalization
, Activation
, MaxPool2D
, Dropout
, Flatten
, Dense
from tensorflow
.keras
import Model
np
.set_printoptions
(threshold
=np
.inf
)
cifar10
= tf
.keras
.datasets
.cifar10
(x_train
, y_train
), (x_test
, y_test
) = cifar10
.load_data
()
x_train
, x_test
= x_train
/ 255.0, x_test
/ 255.0
class ResnetBlock(Model
):
def __init__(self
, filters
, strides
=1, residual_path
=False):
super(ResnetBlock
, self
).__init__
()
self
.filters
= filters
self
.strides
= strides
self
.residual_path
= residual_path
self
.c1
= Conv2D
(filters
, (3, 3), strides
=strides
, padding
='same', use_bias
=False)
self
.b1
= BatchNormalization
()
self
.a1
= Activation
('relu')
self
.c2
= Conv2D
(filters
, (3, 3), strides
=1, padding
='same', use_bias
=False)
self
.b2
= BatchNormalization
()
if residual_path
:
self
.down_c1
= Conv2D
(filters
, (1, 1), strides
=strides
, padding
='same', use_bias
=False)
self
.down_b1
= BatchNormalization
()
self
.a2
= Activation
('relu')
def call(self
, inputs
):
residual
= inputs
x
= self
.c1
(inputs
)
x
= self
.b1
(x
)
x
= self
.a1
(x
)
x
= self
.c2
(x
)
y
= self
.b2
(x
)
if self
.residual_path
:
residual
= self
.down_c1
(inputs
)
residual
= self
.down_b1
(residual
)
out
= self
.a2
(y
+ residual
)
return out
class ResNet18(Model
):
def __init__(self
, block_list
, initial_filters
=64):
super(ResNet18
, self
).__init__
()
self
.num_blocks
= len(block_list
)
self
.block_list
= block_list
self
.out_filters
= initial_filters
self
.c1
= Conv2D
(self
.out_filters
, (3, 3), strides
=1, padding
='same', use_bias
=False)
self
.b1
= BatchNormalization
()
self
.a1
= Activation
('relu')
self
.blocks
= tf
.keras
.models
.Sequential
()
for block_id
in range(len(block_list
)):
for layer_id
in range(block_list
[block_id
]):
if block_id
!= 0 and layer_id
== 0:
block
= ResnetBlock
(self
.out_filters
, strides
=2, residual_path
=True)
else:
block
= ResnetBlock
(self
.out_filters
, residual_path
=False)
self
.blocks
.add
(block
)
self
.out_filters
*= 2
self
.p1
= tf
.keras
.layers
.GlobalAveragePooling2D
()
self
.f1
= tf
.keras
.layers
.Dense
(10, activation
='softmax', kernel_regularizer
=tf
.keras
.regularizers
.l2
())
def call(self
, inputs
):
x
= self
.c1
(inputs
)
x
= self
.b1
(x
)
x
= self
.a1
(x
)
x
= self
.blocks
(x
)
x
= self
.p1
(x
)
y
= self
.f1
(x
)
return y
model
= ResNet18
([2, 2, 2, 2])
model
.compile(optimizer
='adam',
loss
=tf
.keras
.losses
.SparseCategoricalCrossentropy
(from_logits
=False),
metrics
=['sparse_categorical_accuracy'])
checkpoint_save_path
= "./checkpoint/ResNet18.ckpt"
if os
.path
.exists
(checkpoint_save_path
+ '.index'):
print('-------------load the model-----------------')
model
.load_weights
(checkpoint_save_path
)
cp_callback
= tf
.keras
.callbacks
.ModelCheckpoint
(filepath
=checkpoint_save_path
,
save_weights_only
=True,
save_best_only
=True)
history
= model
.fit
(x_train
, y_train
, batch_size
=32, epochs
=5, validation_data
=(x_test
, y_test
), validation_freq
=1,
callbacks
=[cp_callback
])
model
.summary
()
file = open('./weights.txt', 'w')
for v
in model
.trainable_variables
:
file.write
(str(v
.name
) + '\n')
file.write
(str(v
.shape
) + '\n')
file.write
(str(v
.numpy
()) + '\n')
file.close
()
acc
= history
.history
['sparse_categorical_accuracy']
val_acc
= history
.history
['val_sparse_categorical_accuracy']
loss
= history
.history
['loss']
val_loss
= history
.history
['val_loss']
plt
.subplot
(1, 2, 1)
plt
.plot
(acc
, label
='Training Accuracy')
plt
.plot
(val_acc
, label
='Validation Accuracy')
plt
.title
('Training and Validation Accuracy')
plt
.legend
()
plt
.subplot
(1, 2, 2)
plt
.plot
(loss
, label
='Training Loss')
plt
.plot
(val_loss
, label
='Validation Loss')
plt
.title
('Training and Validation Loss')
plt
.legend
()
plt
.show
()
3 各知识点代码实现
在不断完善中…
3.1 常用的优化器的代码实现
▲ 优化器的数学原理
3.1.1 SGD
for epoch
in range(epoch
):
for step
, (x_train
, y_train
) in enumerate(train_db
):
with tf
.GradientTape
() as tape
:
y
= tf
.matmul
(x_train
, w1
) + b1
y
= tf
.nn
.softmax
(y
)
y_
= tf
.one_hot
(y_train
, depth
=3)
loss
= tf
.reduce_mean
(tf
.square
(y_
- y
))
loss_all
+= loss
.numpy
()
grads
= tape
.gradient
(loss
, [w1
, b1
])
w1
.assign_sub
(lr
* grads
[0])
b1
.assign_sub
(lr
* grads
[1])
3.1.2 SGDM
m_w
, m_b
= 0, 0
beta
= 0.9
now_time
= time
.time
()
for epoch
in range(epoch
):
for step
, (x_train
, y_train
) in enumerate(train_db
):
with tf
.GradientTape
() as tape
:
y
= tf
.matmul
(x_train
, w1
) + b1
y
= tf
.nn
.softmax
(y
)
y_
= tf
.one_hot
(y_train
, depth
=3)
loss
= tf
.reduce_mean
(tf
.square
(y_
- y
))
loss_all
+= loss
.numpy
()
grads
= tape
.gradient
(loss
, [w1
, b1
])
m_w
= beta
* m_w
+ (1 - beta
) * grads
[0]
m_b
= beta
* m_b
+ (1 - beta
) * grads
[1]
w1
.assign_sub
(lr
* m_w
)
b1
.assign_sub
(lr
* m_b
)
3.1.3 Adam
m_w
, m_b
= 0, 0
v_w
, v_b
= 0, 0
beta1
, beta2
= 0.9, 0.999
delta_w
, delta_b
= 0, 0
global_step
= 0
now_time
= time
.time
()
for epoch
in range(epoch
):
for step
, (x_train
, y_train
) in enumerate(train_db
):
global_step
+= 1
with tf
.GradientTape
() as tape
:
y
= tf
.matmul
(x_train
, w1
) + b1
y
= tf
.nn
.softmax
(y
)
y_
= tf
.one_hot
(y_train
, depth
=3)
loss
= tf
.reduce_mean
(tf
.square
(y_
- y
))
loss_all
+= loss
.numpy
()
grads
= tape
.gradient
(loss
, [w1
, b1
])
m_w
= beta1
* m_w
+ (1 - beta1
) * grads
[0]
m_b
= beta1
* m_b
+ (1 - beta1
) * grads
[1]
v_w
= beta2
* v_w
+ (1 - beta2
) * tf
.square
(grads
[0])
v_b
= beta2
* v_b
+ (1 - beta2
) * tf
.square
(grads
[1])
m_w_correction
= m_w
/ (1 - tf
.pow(beta1
, int(global_step
)))
m_b_correction
= m_b
/ (1 - tf
.pow(beta1
, int(global_step
)))
v_w_correction
= v_w
/ (1 - tf
.pow(beta2
, int(global_step
)))
v_b_correction
= v_b
/ (1 - tf
.pow(beta2
, int(global_step
)))
w1
.assign_sub
(lr
* m_w_correction
/ tf
.sqrt
(v_w_correction
))
b1
.assign_sub
(lr
* m_b_correction
/ tf
.sqrt
(v_b_correction
))
3.1.4 Adagrad
v_w
, v_b
= 0, 0
now_time
= time
.time
()
for epoch
in range(epoch
):
for step
, (x_train
, y_train
) in enumerate(train_db
):
with tf
.GradientTape
() as tape
:
y
= tf
.matmul
(x_train
, w1
) + b1
y
= tf
.nn
.softmax
(y
)
y_
= tf
.one_hot
(y_train
, depth
=3)
loss
= tf
.reduce_mean
(tf
.square
(y_
- y
))
loss_all
+= loss
.numpy
()
grads
= tape
.gradient
(loss
, [w1
, b1
])
v_w
+= tf
.square
(grads
[0])
v_b
+= tf
.square
(grads
[1])
w1
.assign_sub
(lr
* grads
[0] / tf
.sqrt
(v_w
))
b1
.assign_sub
(lr
* grads
[1] / tf
.sqrt
(v_b
))
3.1.5 Rmsprop
v_w
, v_b
= 0, 0
beta
= 0.9
now_time
= time
.time
()
for epoch
in range(epoch
):
for step
, (x_train
, y_train
) in enumerate(train_db
):
with tf
.GradientTape
() as tape
:
y
= tf
.matmul
(x_train
, w1
) + b1
y
= tf
.nn
.softmax
(y
)
y_
= tf
.one_hot
(y_train
, depth
=3)
loss
= tf
.reduce_mean
(tf
.square
(y_
- y
))
loss_all
+= loss
.numpy
()
grads
= tape
.gradient
(loss
, [w1
, b1
])
v_w
= beta
* v_w
+ (1 - beta
) * tf
.square
(grads
[0])
v_b
= beta
* v_b
+ (1 - beta
) * tf
.square
(grads
[1])
w1
.assign_sub
(lr
* grads
[0] / tf
.sqrt
(v_w
))
b1
.assign_sub
(lr
* grads
[1] / tf
.sqrt
(v_b
))
3.2 梯度更新
grad.gradient(误差,变量) opt.apply_gradients(zip(梯度,变量))
import tensorflow
as tf
import numpy
as np
import matplotlib
.pyplot
as plt
TRAIN_STEPS
=20
train_X
= np
.linspace
(-1, 1, 100)
train_Y
= 2 * train_X
+ np
.random
.randn
(*train_X
.shape
) * 0.33 + 10
w
=tf
.Variable
(initial_value
=1.0)
b
=tf
.Variable
(initial_value
=1.0)
optimizer
=tf
.keras
.optimizers
.SGD
(0.1)
mse
=tf
.keras
.losses
.MeanSquaredError
()
print("w:", w
.numpy
())
print("b:", b
.numpy
())
for i
in range(TRAIN_STEPS
):
print("epoch:",i
)
with tf
.GradientTape
() as tape
:
logit
= w
* train_X
+ b
loss
=mse
(train_Y
,logit
)
gradients
=tape
.gradient
(target
=loss
,sources
=[w
,b
])
optimizer
.apply_gradients
(zip(gradients
,[w
,b
]))
print("w:", w
.numpy
())
print("w_gradients:",gradients
[0].numpy
())
print("b:", b
.numpy
())
print("b_gradients:",gradients
[1].numpy
())
plt
.plot
(train_X
,train_Y
,"+")
plt
.plot
(train_X
,w
* train_X
+ b
)
plt
.show
()