building_with_espdl-1-success

building_with_espdl-1-success

基本完成原文第1部分。

1
2
3
4
5
6
7
8
9
10
11
12
# Import necessary libraries
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.utils import to_categorical
import numpy as np
import pandas as pd

# Load the dataset (you may need to download it manually from Kaggle and upload to Colab)
# Assuming you have a folder 'gestures' with images sorted in subdirectories
data_dir = "./leapgestrecog/leapGestRecog" # 修改此处,指定手势数据集的路径

处理数据集部分

无用1

这一段代码可无需理会,因为在和gpt交流的过程中,明白了pkl也只是原作者切割并且整理好的数据集,我这里可以完全不采用,并且原作中导入数据集的方法较为繁琐,此处也有更简单的方法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import os
import numpy as np
from sklearn.model_selection import train_test_split
from PIL import Image
from keras.preprocessing.image import img_to_array

# 数据集根目录
data_dir = "./leapgestrecog/leapGestRecog" # 请替换为你的实际路径

# 定义类别标签映射(根据你的数据集的第二层目录进行映射)
class_labels = {
'01_palm': 0,
'02_l': 1,
'03_fist': 2,
'04_fist_moved': 3,
'05_thumb': 4,
'06_index': 5,
'07_ok': 6,
'08_palm_moved': 7,
'09_c': 8,
'10_down': 9
}

# 初始化图像路径和标签的列表
image_paths = []
labels = []

# 遍历第一层文件夹
for folder in os.listdir(data_dir):
folder_path = os.path.join(data_dir, folder)
if os.path.isdir(folder_path): # 确保是文件夹
# 遍历第二层文件夹
for class_name, label in class_labels.items():
class_folder_path = os.path.join(folder_path, class_name)
if os.path.isdir(class_folder_path):
# 遍历图像文件
for img_file in os.listdir(class_folder_path):
img_path = os.path.join(class_folder_path, img_file)
image_paths.append(img_path)
labels.append(label)

# 设置图像尺寸
img_height, img_width = 96, 96

# 加载图像并转换为数组
images = []
for path in image_paths:
try:
img = Image.open(path).convert('RGB')
img = img.resize((img_width, img_height))
img_array = img_to_array(img) / 255.0 # 归一化
images.append(img_array)
except Exception as e:
print(f"无法处理图像 {path}: {e}")

# 转换为 NumPy 数组
X = np.array(images)
y = np.array(labels)

# 划分训练集、验证集和测试集
ts = 0.3 # 30% 的数据作为测试集
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=ts, random_state=42)
X_test, X_cal, y_test, y_cal = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# 检查数据集形状
print("训练集样本数:", X_train.shape[0])
print("测试集样本数:", X_test.shape[0])
print("校准集样本数:", X_cal.shape[0])

简化的切割、标注、导入数据集的方法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# 设置图片的尺寸和批量大小
img_height, img_width = 96, 96 # 按照文档中的建议将图像调整为 (96, 96)
batch_size = 16

# 加载和预处理数据集
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
data_dir,
validation_split=0.2, # 将20%数据作为验证集
subset="training",
seed=123, # 保证数据划分的可重复性
image_size=(img_height, img_width),
batch_size=batch_size
)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset="validation",
seed=123,
image_size=(img_height, img_width),
batch_size=batch_size
)

# 缓存和优化数据加载,以提高训练速度
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

像素值必须归一化,否则无法收敛

1
2
3
4
5
6
7
8
# 插入归一化代码
def normalize_img(image, label):
image = tf.cast(image, tf.float32) / 255.0 # 将像素值归一化到 [0, 1]
return image, label

# 应用归一化到数据集
train_ds = train_ds.map(normalize_img)
val_ds = val_ds.map(normalize_img)

无用2,一开始是为了检查gpt声称简化的切割方法和原作者的方法得到的样本量差距大不大,若不大则可以使用简化方法

1
2
3
4
5
6
7
8
9
10
11
12
# 检查样本数是否一致
print("使用 image_dataset_from_directory 分割的数据集:")
train_count = tf.data.experimental.cardinality(train_ds).numpy() * batch_size
val_count = tf.data.experimental.cardinality(val_ds).numpy() * batch_size
print(f"训练集样本数: {train_count}")
print(f"验证集样本数: {val_count}")

print("使用 train_test_split 分割的数据集:")
print(f"训练集样本数: {len(X_train)}")
print(f"测试集样本数: {len(X_test)}")
print(f"校准集样本数: {len(X_cal)}")

cnn算法部分

*必须注意这里原作者只抽取了kaggle数据集中的6种,但我用了全部10种,一开始也直接复制代码写的Dense(6,但实际上应该是Dense(10

这一段是由gpt给出的版本,和原作者类似,属于链式调用

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

model = Sequential([
Conv2D(32, (3, 3), activation='relu', input_shape=(96, 96, 3)),
MaxPooling2D(pool_size=(2, 2)),
Dropout(0.2),

Conv2D(64, (3, 3), activation='relu'),
MaxPooling2D(pool_size=(2, 2)),
Dropout(0.2),

Conv2D(128, (3, 3), activation='relu'),
MaxPooling2D(pool_size=(2, 2)),
Dropout(0.2),

Flatten(),
Dense(128, activation='relu'),
Dropout(0.2),
Dense(10, activation='softmax')
])

无用3,这是原作者部分,属于逐层添加

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(96, 96, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(10, activation='softmax'))

这部分可以去掉,但输出结构摘要可以与原作者的过程相对比

这里要强调的是原作者用的是96,96,1的灰度图(最后一个数字表示通道数),而本文采用的是96,96,3的三通道的彩色图,会使参数多一些,但影响不大。

1
2
3
# 输出模型结构摘要
model.summary()

模型的编译部分

这一段是我采用的模型编译部分

1
2
3
4
5
6
7
8
9
10
11
12
13
14
# 编译模型
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# 模型训练
epochs = 5 # 设置训练轮数

# 使用 train_ds 和 val_ds 进行训练
history = model.fit(
train_ds, # 使用训练集数据集对象
epochs=epochs,
validation_data=val_ds, # 使用验证集数据集对象
verbose=1
)

无用4,这一段是修改原作者的

因为从导入数据集那里就产生了区别,我在导入的过程中并没有产生X_train、y_train这类变量,因此也不使用这段代码。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# 编译模型
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# 训练参数设置
epochs = 10 # 设置训练轮数,可以根据需求调整
batch_size = 32 # 每批次的样本数量

# 使用训练集和验证集进行训练
history = model.fit(
X_train, y_train, # 训练集数据
epochs=epochs,
batch_size=batch_size,
validation_data=(X_test, y_test), # 验证集数据
verbose=1
)

拍错部分,分别从不同可能导致精确度提升不上去的角度进行排查

1
2
3
4
5
6
7
8
9
# 查看训练集和验证集标签的示例数据类型
for images, labels in train_ds.take(1):
print("训练集标签数据类型:", labels.dtype)
print("训练集标签示例:", labels.numpy()[:10])

for images, labels in val_ds.take(1):
print("验证集标签数据类型:", labels.dtype)
print("验证集标签示例:", labels.numpy()[:10])

1
2
3
4
5
6
7
8
9
# 查看训练集和验证集图像的示例数据范围
for images, labels in train_ds.take(1):
print("训练集图像数据最小值:", images.numpy().min())
print("训练集图像数据最大值:", images.numpy().max())

for images, labels in val_ds.take(1):
print("验证集图像数据最小值:", images.numpy().min())
print("验证集图像数据最大值:", images.numpy().max())

1
2
3
4
# 查看训练集和验证集的结构
print("训练集数据集对象:", train_ds)
print("验证集数据集对象:", val_ds)

1
2
3
4
5
6
7
# 在归一化之后,检查是否有 NaN 或 Inf 值
for images, labels in train_ds.take(1):
print("训练集图像中是否存在 NaN:", np.isnan(images.numpy()).any())
print("训练集图像中是否存在 Inf:", np.isinf(images.numpy()).any())
print("训练集标签中是否存在 NaN:", np.isnan(labels.numpy()).any())
print("训练集标签中是否存在 Inf:", np.isinf(labels.numpy()).any())

查看训练过程的历史记录,但我只在失败后用过,成功后还没有运行

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import matplotlib.pyplot as plt

# 检查是否存在历史记录
if history.history:
# 绘制训练和验证准确率
plt.plot(history.history['accuracy'], label='训练准确率')
plt.plot(history.history['val_accuracy'], label='验证准确率')
plt.xlabel('训练轮次')
plt.ylabel('准确率')
plt.legend()
plt.show()

# 绘制训练和验证损失
plt.plot(history.history['loss'], label='训练损失')
plt.plot(history.history['val_loss'], label='验证损失')
plt.xlabel('训练轮次')
plt.ylabel('损失')
plt.legend()
plt.show()
else:
print("没有历史记录,可能训练过程中出现错误。")

自我完整代码

初版

唯一做的微调是batch_size和学习率学习率目前在我看来对我影响不大,一开始accuracy上不去的愿意一个是Dense设置不对,还有一个是没有归一化。batch_size的影响在下一节提到。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# 设置图片的尺寸和批量大小
img_height, img_width = 96, 96
batch_size = 16 # 为了适应4GB显存,将批量大小设置为16

# 数据集根目录
data_dir = "./leapgestrecog/leapGestRecog" # 请替换为你的实际路径

# 加载和预处理数据集
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
data_dir,
validation_split=0.2, # 将20%数据作为验证集
subset="training",
seed=123, # 保证数据划分的可重复性
image_size=(img_height, img_width),
batch_size=batch_size
)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset="validation",
seed=123,
image_size=(img_height, img_width),
batch_size=batch_size
)

# 缓存和优化数据加载,以提高训练速度
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

# 应用归一化到数据集
def normalize_img(image, label):
image = tf.cast(image, tf.float32) / 255.0 # 将像素值归一化到 [0, 1]
return image, label

train_ds = train_ds.map(normalize_img)
val_ds = val_ds.map(normalize_img)

# 定义模型结构
model = Sequential([
Conv2D(32, (3, 3), activation='relu', input_shape=(96, 96, 3)),
MaxPooling2D(pool_size=(2, 2)),
Dropout(0.2),

Conv2D(64, (3, 3), activation='relu'),
MaxPooling2D(pool_size=(2, 2)),
Dropout(0.2),

Conv2D(128, (3, 3), activation='relu'),
MaxPooling2D(pool_size=(2, 2)),
Dropout(0.2),

Flatten(),
Dense(128, activation='relu'),
Dropout(0.2),
Dense(10, activation='softmax')
])

# 编译模型,设置较小的学习率以提高数值稳定性
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])

# 模型训练
epochs = 10
history = model.fit(
train_ds,
epochs=epochs,
validation_data=val_ds,
verbose=1
)

batch_size影响,据我观察很小

为8时:

为16时:

为32时:

有一说一至少我看区别不大……

次版

后来esp-dl无法完成优化和量化部分,怀疑原因是

  • 输入数据集的形状不一样,是3通道的彩色图导致,在次版中改为1通道的灰度图
  • 原文只采用了kaggle中10种手势的6种,而我初版使用了全部种类,这里也削减为前6种
    最后发现都不是
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    import os
    import cv2
    import numpy as np
    import pickle
    from sklearn.model_selection import train_test_split
    import tensorflow as tf
    from tensorflow import keras
    from keras.models import Sequential
    from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
    import tf2onnx

    # 数据集路径
    data_dir = "./leapGestRecog" # 新的根路径

    # 设置图片的尺寸和批量大小
    img_height, img_width = 96, 96
    batch_size = 16

    # 定义只处理前6个手势文件夹
    allowed_folders = ['01_palm', '02_l', '03_fist', '04_fist_moved', '05_thumb', '06_index']

    # 读取图像数据和标签
    X = []
    y = []
    label_map = {folder: idx for idx, folder in enumerate(allowed_folders)} # 创建标签映射

    for main_folder in sorted(os.listdir(data_dir)):
    main_folder_path = os.path.join(data_dir, main_folder)
    if os.path.isdir(main_folder_path):
    for folder_name in allowed_folders: # 仅处理前6个手势文件夹
    folder_path = os.path.join(main_folder_path, folder_name)
    if os.path.isdir(folder_path):
    label = label_map[folder_name] # 使用映射中的标签
    for img_name in os.listdir(folder_path):
    img_path = os.path.join(folder_path, img_name)
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    if img is not None:
    img = cv2.resize(img, (img_width, img_height))
    X.append(img)
    y.append(label)

    # 将数据转换为 NumPy 数组并规范化
    X = np.array(X).reshape(-1, img_height, img_width, 1) / 255.0 # 归一化
    y = np.array(y)

    # 划分数据集
    ts = 0.3 # 测试集比例
    X_train, X_test1, y_train, y_test1 = train_test_split(X, y, test_size=ts, random_state=42)
    X_test, X_cal, y_test, y_cal = train_test_split(X_test1, y_test1, test_size=ts, random_state=42)

    # 保存校准数据集为 pickle 文件
    with open('X_test.pkl', 'wb') as file:
    pickle.dump(X_test, file)

    with open('y_test.pkl', 'wb') as file:
    pickle.dump(y_test, file)

    with open('X_train.pkl', 'wb') as file:
    pickle.dump(X_train, file)

    with open('y_train.pkl', 'wb') as file:
    pickle.dump(y_train, file)

    # 打印 TensorFlow 版本
    print(tf.__version__)

    # 定义模型结构
    model = Sequential()
    model.add(Conv2D(32, (5, 5), activation='relu', input_shape=(96, 96, 1)))
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.2))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.2))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(6, activation='softmax')) # 修改为6个分类

    # 编译模型
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # 训练模型
    history = model.fit(X_train, y_train, epochs=5, batch_size=64, verbose=1, validation_data=(X_test, y_test))

    # 保存模型为 .h5 格式
    model.save('handrecognition_model.h5')

    # 加载保存的模型
    model = tf.keras.models.load_model("handrecognition_model.h5")

    # 将模型转换为 SavedModel 格式,准备转换为 ONNX
    tf.saved_model.save(model, "tmp_model")

    # 使用 tf2onnx 将 SavedModel 格式模型转换为 ONNX 格式
    !python -m tf2onnx.convert --saved-model "tmp_model" --output "handrecognition_model.onnx"

后续保存和转化模型

保存模型

1
model.save('handrecognition_model.h5')

转化模型

1
2
3
4
model = tf.keras.models.load_model("handrecognition_model.h5")
tf.saved_model.save(model, "tmp_model")
import tf2onnx
!python -m tf2onnx.convert --saved-model "tmp_model" --output "handrecognition_model.onnx"

和google.colab结合(我还目前用不到,所以未尝试)

1
2
# Step 3: 压缩文件(可选)
!zip -r handrecognition_model.zip tmp_model

这是压缩成功的意思

  adding: tmp_model/ (stored 0%)
  adding: tmp_model/variables/ (stored 0%)
  adding: tmp_model/variables/variables.data-00000-of-00001 (deflated 10%)
  adding: tmp_model/variables/variables.index (deflated 64%)
  adding: tmp_model/fingerprint.pb (stored 0%)
  adding: tmp_model/assets/ (stored 0%)
  adding: tmp_model/saved_model.pb (deflated 89%)
1
2
3
4
5
# Step 4: 下载模型文件(适用于 Google Colab)
from google.colab import files
files.download("handrecognition_model.h5")
files.download("handrecognition_model.onnx")
files.download("handrecognition_model.zip")

netron可视化

初版模型可视化:

次版模型可视化:

原版模型可视化:


building_with_espdl-1-success
https://blakehansen130.github.io/2024/11/02/chatgpt_version1/
发布于
2024年11月2日
许可协议