阴天，雨天，晴天，你喜欢哪一个（使用神经网络进行模型训练并进行天气图识别和分类）

2023-04-14 | 阅读：次

一、项目目录结构和启动流程

目录结构如下：

├─cloudy
├─lin_predict
├─lin_test
│  ├─_cloudy_
│  ├─_haze_
│  ├─_rainy_
│  ├─_snow_
│  ├─_sunny_
│  └─_thunder_
├─lin_train
│  ├─_cloudy
│  ├─_haze
│  ├─_rainy
│  ├─_snow
│  ├─_sunny
│  └─_thunder
├─rainy
├─haze
└─snow
└─sunny
└─thunder
└─re_name.py
└─resize.py
└─split_test_train.py
└─img_label_and_model.py
└─prediction.py
└─requirements.txt

在snow,cloudy,rainy,sunny,thunder,haze文件夹中分别存放了对应的10000张天气图，这里我使用了6种天气图。

第一步，使用re_name.py对图片进行重命名，将图片命名成0.jpg到9999.jpg。

第二步，使用resize.py对图片进行缩放，比如说将图片缩放成100像素100像素。16G内存的电脑，像素大小为220220大概是训练的极限了。

第三步，使用split_test_train.py将图片分成训练集和测试集，比如说从10000张图片中随机抽取500张作为训练集，100张作为测试集，分别保存在lin_train和lin_test文件夹下面对应的天气文件夹中。

第四步，开始训练模型，使用img_label_and_model.py，这个文件中包含了图片的预处理，图片的标签化，模型的训练，模型的保存，模型的评估等等。

第五步，使用prediction.py对模型进行预测，这里需要预测的图片文件放在lin_predict文件夹下面。

二、代码解析

1、re_name.py

这个代码要运行6次，分别对应6种天气图的重命名。

import os
class BatchRename():
    '''
    批量重命名文件夹中的图片文件
    '''
    def __init__(self):
        # 此处应该为自己存放图片文件夹的路径
        # self.path = "./cloudy"
        # self.path = "./haze"
        # self.path = "./rainy"
        # self.path = "./snow"
        # self.path = "./sunny"
        self.path = "./thunder"

    def rename(self):
        filelist = os.listdir(self.path)
        total_num = len(filelist)
        i = 0
        for item in filelist:
            if item.endswith('.jpg'):
                src = os.path.join(os.path.abspath(self.path), item) # 原图的文件名
                dst = os.path.join(os.path.abspath(self.path), str(i) + '.jpg') # 新图的文件名字
                try:
                    os.rename(src, dst)
                    print('converting %s to %s ...' % (src, dst))
                    i = i + 1
                except:
                    continue
        print ('total %d to rename & converted %d jpgs' % (total_num, i))

if __name__ == '__main__':
    demo = BatchRename()
    demo.rename()

2、resize.py

# 1.数据清洗
import os
from PIL import Image


def read_image(paths):
    os.listdir(paths)
    filelist = []
    for root, dirs, files in os.walk(paths):
        for file in files:
            if os.path.splitext(file)[1] == ".jpg":
                filelist.append(os.path.join(root, file))
    return filelist
def im_xiangsu(paths):
    for filename in paths:
        try:
            im = Image.open(filename)
            newim = im.resize((128, 128))

            newim.save( filename[:-4]+ '-convert.jpg')
            # delete the old file
            os.remove(filename)
            print('图片' + filename[:-4] + '像素转化完成')
        except OSError as e:
           print(e.args)


if __name__ == '__main__':
    test_path = './'
    filelist=read_image(test_path)
    print(filelist)
    im_xiangsu(filelist)

3、split_test_train.py

这个代码要运行流程要修改self.path = "./haze"和demo1.selectFile(path6,0.05)以及demo2.selectFile(test_path6,0.01)。

其中，haze对应6，thunder对应5，snow对应4，cloud对应3，sunny对应2，rainy对应1。

## 由于训练的数据太多，所以需要将数据分成训练集和测试集
import os
import shutil
import random

# train_path
path1 = r'./lin_train/_rainy'
path2 = r'./lin_train/_sunny'
path3 = r'./lin_train/_cloudy'
path4 = r'./lin_train/_snow'
path5 = r'./lin_train/_thunder'
path6 = r'./lin_train/_haze'

# test_path
test_path1 = './lin_test/_rainy_'
test_path2 = './lin_test/_sunny_'
test_path3 = './lin_test/_cloudy_'
test_path4 = './lin_test/_snow_'
test_path5 = './lin_test/_thunder_'
test_path6 = './lin_test/_haze_'


class BatchRename():
    '''
    批量重命名文件夹中的图片文件
    '''
    def __init__(self):
        # 此处应该为自己存放图片文件夹的路径
        # self.path = "./rainy"
        # self.path = r"./sunny"
        # self.path = r"./cloudy"
        # self.path = "./snow"
        # self.path = r"./thunder"
        self.path = "./haze"

    def selectFile(self, path, rate):
        # 从path中随机选取rate比例的文件
        pathDir = os.listdir(self.path)
        filenumber = len(pathDir)
        picknumber = int(filenumber * rate)
        #
        sample = random.sample(pathDir, picknumber)
        print(sample)
        #  # clear all the file in "./lin_test/_thunder_"
        for file in os.listdir(path):
            os.remove(os.path.join(path, file))
        for name in sample:
            origin = os.path.join(self.path, name)
            #print(origin)
            # copy file "./thunder" to "./lin_test/_thunder_" and "./lin_train/_thunder"
            target = os.path.join(path, name)
            shutil.copy2(origin, target)
        print("move done!")
        return


# sunny
demo1 = BatchRename()
demo1.selectFile(path6,0.05)
demo2 = BatchRename()
demo2.selectFile(test_path6,0.01)

4、img_label_and_model.py

import os
from PIL import Image
import numpy as np
import keras
from keras import datasets,layers,models


def im_array(paths):
    '''
    将图片转化为数组
    :param paths: 读入图片的路径
    :return:
    M: 图片的数组,shape为(图片数量,图片的长,图片的宽,图片的通道数)
    '''
    M = []
    for filename in paths:
        im = Image.open(filename)
        im_L = im.convert("L")  # 模式L
        Core = im_L.getdata() # 获取图像像素值
        arr1 = np.array(Core, dtype='float32') / 255.0 # 将像素值转化为0-1之间的数
        list_img = arr1.tolist() # 将数组转化为列表,方便后面的reshape,
        M.extend(list_img) # 将列表中的元素添加到M中
    return M


def read_image(paths):
    os.listdir(paths)
    filelist = []
    for root, dirs, files in os.walk(paths):
        for file in files:
            if os.path.splitext(file)[1] == ".jpg":
                filelist.append(os.path.join(root, file))
    return filelist


path1 = './lin_train/_rainy'
path2 = './lin_train/_sunny'
path3 = './lin_train/_cloudy'
path4 = './lin_train/_snow'
path5 = './lin_train/_thunder'
path6 = './lin_train/_haze'
filelist_1 = read_image(path1)
filelist_2 = read_image(path2)
filelist_3 = read_image(path3)
filelist_4 = read_image(path4)
filelist_5 = read_image(path5)
filelist_6 = read_image(path6)
filelist_all = filelist_1 + filelist_2+filelist_3+filelist_4+filelist_5+filelist_6

M = []
M = im_array(filelist_all) # 将图片转化为数组

dict_label={0:'雨天',1:'晴天',2:'多云',3:'雪天',4:'雷天',5:'雾天'}
train_images=np.array(M).reshape(len(filelist_all),128,128)
label=[0]*len(filelist_1)+[1]*len(filelist_2)+[2]*len(filelist_3)+[3]*len(filelist_4)+[4]*len(filelist_5)+[5]*len(filelist_6)
train_lables=np.array(label)        #数据标签
train_images = train_images[ ..., np.newaxis ]        #数据图片

test_path1 = './lin_test/_rainy_'
test_path2 = './lin_test/_sunny_'
test_path3 = './lin_test/_cloudy_'
test_path4 = './lin_test/_snow_'
test_path5 = './lin_test/_thunder_'
test_path6 = './lin_test/_haze_'

test_filelist_1 = read_image(test_path1)
test_filelist_2 = read_image(test_path2)
test_filelist_3 = read_image(test_path3)
test_filelist_4 = read_image(test_path4)
test_filelist_5 = read_image(test_path5)
test_filelist_6 = read_image(test_path6)
filelist_tot = test_filelist_1+ test_filelist_2+test_filelist_3+test_filelist_4+test_filelist_5+test_filelist_6

N = []
N = im_array(filelist_tot)

dict_label={0:'雨天',1:'晴天',2:'多云',3:'雪天',4:'雷天',5:'雾天'}
test_images=np.array(N).reshape(len(filelist_tot),128,128)
label=[0]*len(test_filelist_1)+[1]*len(test_filelist_2)+[2]*len(test_filelist_3)+[3]*len(test_filelist_4)+[4]*len(test_filelist_5)+[5]*len(test_filelist_6)
test_lables=np.array(label)        #数据标签
test_images = test_images[ ..., np.newaxis ]        #数据图片,...表示前面的维度不变,np.newaxis表示增加一个维度

# 进入神经网络
model = models.Sequential()
# Conv2D表示卷积层，activation表示激活函数
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 1)))
# MaxPooling2D表示最大池化层
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))

# Flatten表示展平层，将多维张量展平为一维向量
model.add(layers.Flatten())

# Dense表示全连接层，
# 这两行代码分别添加了两个全连接层，第一个全连接层包含64个神经元，激活函数为ReLU；第二个全连接层包含2个神经元，激活函数为softmax，用于输出模型的分类结果。
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(6, activation='softmax'))
model.summary()  # 显示模型的架构
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
#epochs为训练多少轮、batch_size为每次训练多少个样本
model.fit(train_images, train_lables, epochs=1000)

print(model.evaluate(test_images,test_lables)) # 这里的test_images和test_lables是测试集的数据和标签

# save model to h5
model.save('model_epochs=1000.h5')

5、predict.py

import os
from PIL import Image
import numpy as np
# load model from h5
from tensorflow import keras
model = keras.models.load_model('./model_epochs=1000.h5')
#model.summary()


def read_image(paths):
    os.listdir(paths)
    filelist = []
    for root, dirs, files in os.walk(paths):
        for file in files:
            if os.path.splitext(file)[1] == ".jpg":
                filelist.append(os.path.join(root, file))
    return filelist


def im_xiangsu(paths):
    for filename in paths:
        try:
            im = Image.open(filename)
            newim = im.resize((128, 128))

            newim.save( filename[:-4]+ '-convert.jpg')
            # delete the old file
            os.remove(filename)
            print('图片' + filename[:-4] + '像素转化完成')
        except OSError as e:
           print(e.args)
def im_s_array(paths,i):
    im = Image.open(paths[i]) # 读取图片 [0]是为了读取第一张图片
    im_L = im.convert("L")  # 模式L
    Core = im_L.getdata()
    arr1 = np.array(Core, dtype='float32') / 255.0
    list_img = arr1.tolist()
    images = np.array(list_img).reshape(-1, 128, 128, 1)
    return images

# load image
test = './lin_predict'  # 你要测试的图片的路径
filelist = read_image(test)
im_xiangsu(filelist)
filelist = read_image(test)


# 依次读入filelist中的图片
for i in range(len(filelist)):

    img = im_s_array(filelist,i)

    # 预测图像
    dict_label={0:'雨天',1:'晴天',2:'多云',3:'雪天',4:'雷天',5:'雾天'}
    predictions_single = model.predict(img)
    print(filelist[i],"的预测结果为:", dict_label[np.argmax(predictions_single)])
    # 这里返回数组中概率最大的那个
    print(predictions_single)