一、项目目录结构和启动流程
目录结构如下:
├─cloudy
├─lin_predict
├─lin_test
│ ├─_cloudy_
│ ├─_haze_
│ ├─_rainy_
│ ├─_snow_
│ ├─_sunny_
│ └─_thunder_
├─lin_train
│ ├─_cloudy
│ ├─_haze
│ ├─_rainy
│ ├─_snow
│ ├─_sunny
│ └─_thunder
├─rainy
├─haze
└─snow
└─sunny
└─thunder
└─re_name.py
└─resize.py
└─split_test_train.py
└─img_label_and_model.py
└─prediction.py
└─requirements.txt
在snow,cloudy,rainy,sunny,thunder,haze文件夹中分别存放了对应的10000张天气图,这里我使用了6种天气图。
第一步,使用re_name.py对图片进行重命名,将图片命名成0.jpg到9999.jpg。
第二步,使用resize.py对图片进行缩放,比如说将图片缩放成100像素100像素。16G内存的电脑,像素大小为220220大概是训练的极限了。
第三步,使用split_test_train.py将图片分成训练集和测试集,比如说从10000张图片中随机抽取500张作为训练集,100张作为测试集,分别保存在lin_train和lin_test文件夹下面对应的天气文件夹中。
第四步,开始训练模型,使用img_label_and_model.py,这个文件中包含了图片的预处理,图片的标签化,模型的训练,模型的保存,模型的评估等等。
第五步,使用prediction.py对模型进行预测,这里需要预测的图片文件放在lin_predict文件夹下面。
二、代码解析
1、re_name.py
这个代码要运行6次,分别对应6种天气图的重命名。
import os
class BatchRename():
'''
批量重命名文件夹中的图片文件
'''
def __init__(self):
# 此处应该为自己存放图片文件夹的路径
# self.path = "./cloudy"
# self.path = "./haze"
# self.path = "./rainy"
# self.path = "./snow"
# self.path = "./sunny"
self.path = "./thunder"
def rename(self):
filelist = os.listdir(self.path)
total_num = len(filelist)
i = 0
for item in filelist:
if item.endswith('.jpg'):
src = os.path.join(os.path.abspath(self.path), item) # 原图的文件名
dst = os.path.join(os.path.abspath(self.path), str(i) + '.jpg') # 新图的文件名字
try:
os.rename(src, dst)
print('converting %s to %s ...' % (src, dst))
i = i + 1
except:
continue
print ('total %d to rename & converted %d jpgs' % (total_num, i))
if __name__ == '__main__':
demo = BatchRename()
demo.rename()
2、resize.py
# 1.数据清洗
import os
from PIL import Image
def read_image(paths):
os.listdir(paths)
filelist = []
for root, dirs, files in os.walk(paths):
for file in files:
if os.path.splitext(file)[1] == ".jpg":
filelist.append(os.path.join(root, file))
return filelist
def im_xiangsu(paths):
for filename in paths:
try:
im = Image.open(filename)
newim = im.resize((128, 128))
newim.save( filename[:-4]+ '-convert.jpg')
# delete the old file
os.remove(filename)
print('图片' + filename[:-4] + '像素转化完成')
except OSError as e:
print(e.args)
if __name__ == '__main__':
test_path = './'
filelist=read_image(test_path)
print(filelist)
im_xiangsu(filelist)
3、split_test_train.py
这个代码要运行流程要修改self.path = "./haze"
和demo1.selectFile(path6,0.05)
以及demo2.selectFile(test_path6,0.01)
。
其中,haze对应6,thunder对应5,snow对应4,cloud对应3,sunny对应2,rainy对应1。
## 由于训练的数据太多,所以需要将数据分成训练集和测试集
import os
import shutil
import random
# train_path
path1 = r'./lin_train/_rainy'
path2 = r'./lin_train/_sunny'
path3 = r'./lin_train/_cloudy'
path4 = r'./lin_train/_snow'
path5 = r'./lin_train/_thunder'
path6 = r'./lin_train/_haze'
# test_path
test_path1 = './lin_test/_rainy_'
test_path2 = './lin_test/_sunny_'
test_path3 = './lin_test/_cloudy_'
test_path4 = './lin_test/_snow_'
test_path5 = './lin_test/_thunder_'
test_path6 = './lin_test/_haze_'
class BatchRename():
'''
批量重命名文件夹中的图片文件
'''
def __init__(self):
# 此处应该为自己存放图片文件夹的路径
# self.path = "./rainy"
# self.path = r"./sunny"
# self.path = r"./cloudy"
# self.path = "./snow"
# self.path = r"./thunder"
self.path = "./haze"
def selectFile(self, path, rate):
# 从path中随机选取rate比例的文件
pathDir = os.listdir(self.path)
filenumber = len(pathDir)
picknumber = int(filenumber * rate)
#
sample = random.sample(pathDir, picknumber)
print(sample)
# # clear all the file in "./lin_test/_thunder_"
for file in os.listdir(path):
os.remove(os.path.join(path, file))
for name in sample:
origin = os.path.join(self.path, name)
#print(origin)
# copy file "./thunder" to "./lin_test/_thunder_" and "./lin_train/_thunder"
target = os.path.join(path, name)
shutil.copy2(origin, target)
print("move done!")
return
# sunny
demo1 = BatchRename()
demo1.selectFile(path6,0.05)
demo2 = BatchRename()
demo2.selectFile(test_path6,0.01)
4、img_label_and_model.py
import os
from PIL import Image
import numpy as np
import keras
from keras import datasets,layers,models
def im_array(paths):
'''
将图片转化为数组
:param paths: 读入图片的路径
:return:
M: 图片的数组,shape为(图片数量,图片的长,图片的宽,图片的通道数)
'''
M = []
for filename in paths:
im = Image.open(filename)
im_L = im.convert("L") # 模式L
Core = im_L.getdata() # 获取图像像素值
arr1 = np.array(Core, dtype='float32') / 255.0 # 将像素值转化为0-1之间的数
list_img = arr1.tolist() # 将数组转化为列表,方便后面的reshape,
M.extend(list_img) # 将列表中的元素添加到M中
return M
def read_image(paths):
os.listdir(paths)
filelist = []
for root, dirs, files in os.walk(paths):
for file in files:
if os.path.splitext(file)[1] == ".jpg":
filelist.append(os.path.join(root, file))
return filelist
path1 = './lin_train/_rainy'
path2 = './lin_train/_sunny'
path3 = './lin_train/_cloudy'
path4 = './lin_train/_snow'
path5 = './lin_train/_thunder'
path6 = './lin_train/_haze'
filelist_1 = read_image(path1)
filelist_2 = read_image(path2)
filelist_3 = read_image(path3)
filelist_4 = read_image(path4)
filelist_5 = read_image(path5)
filelist_6 = read_image(path6)
filelist_all = filelist_1 + filelist_2+filelist_3+filelist_4+filelist_5+filelist_6
M = []
M = im_array(filelist_all) # 将图片转化为数组
dict_label={0:'雨天',1:'晴天',2:'多云',3:'雪天',4:'雷天',5:'雾天'}
train_images=np.array(M).reshape(len(filelist_all),128,128)
label=[0]*len(filelist_1)+[1]*len(filelist_2)+[2]*len(filelist_3)+[3]*len(filelist_4)+[4]*len(filelist_5)+[5]*len(filelist_6)
train_lables=np.array(label) #数据标签
train_images = train_images[ ..., np.newaxis ] #数据图片
test_path1 = './lin_test/_rainy_'
test_path2 = './lin_test/_sunny_'
test_path3 = './lin_test/_cloudy_'
test_path4 = './lin_test/_snow_'
test_path5 = './lin_test/_thunder_'
test_path6 = './lin_test/_haze_'
test_filelist_1 = read_image(test_path1)
test_filelist_2 = read_image(test_path2)
test_filelist_3 = read_image(test_path3)
test_filelist_4 = read_image(test_path4)
test_filelist_5 = read_image(test_path5)
test_filelist_6 = read_image(test_path6)
filelist_tot = test_filelist_1+ test_filelist_2+test_filelist_3+test_filelist_4+test_filelist_5+test_filelist_6
N = []
N = im_array(filelist_tot)
dict_label={0:'雨天',1:'晴天',2:'多云',3:'雪天',4:'雷天',5:'雾天'}
test_images=np.array(N).reshape(len(filelist_tot),128,128)
label=[0]*len(test_filelist_1)+[1]*len(test_filelist_2)+[2]*len(test_filelist_3)+[3]*len(test_filelist_4)+[4]*len(test_filelist_5)+[5]*len(test_filelist_6)
test_lables=np.array(label) #数据标签
test_images = test_images[ ..., np.newaxis ] #数据图片,...表示前面的维度不变,np.newaxis表示增加一个维度
# 进入神经网络
model = models.Sequential()
# Conv2D表示卷积层,activation表示激活函数
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 1)))
# MaxPooling2D表示最大池化层
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
# Flatten表示展平层,将多维张量展平为一维向量
model.add(layers.Flatten())
# Dense表示全连接层,
# 这两行代码分别添加了两个全连接层,第一个全连接层包含64个神经元,激活函数为ReLU;第二个全连接层包含2个神经元,激活函数为softmax,用于输出模型的分类结果。
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(6, activation='softmax'))
model.summary() # 显示模型的架构
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
#epochs为训练多少轮、batch_size为每次训练多少个样本
model.fit(train_images, train_lables, epochs=1000)
print(model.evaluate(test_images,test_lables)) # 这里的test_images和test_lables是测试集的数据和标签
# save model to h5
model.save('model_epochs=1000.h5')
5、predict.py
import os
from PIL import Image
import numpy as np
# load model from h5
from tensorflow import keras
model = keras.models.load_model('./model_epochs=1000.h5')
#model.summary()
def read_image(paths):
os.listdir(paths)
filelist = []
for root, dirs, files in os.walk(paths):
for file in files:
if os.path.splitext(file)[1] == ".jpg":
filelist.append(os.path.join(root, file))
return filelist
def im_xiangsu(paths):
for filename in paths:
try:
im = Image.open(filename)
newim = im.resize((128, 128))
newim.save( filename[:-4]+ '-convert.jpg')
# delete the old file
os.remove(filename)
print('图片' + filename[:-4] + '像素转化完成')
except OSError as e:
print(e.args)
def im_s_array(paths,i):
im = Image.open(paths[i]) # 读取图片 [0]是为了读取第一张图片
im_L = im.convert("L") # 模式L
Core = im_L.getdata()
arr1 = np.array(Core, dtype='float32') / 255.0
list_img = arr1.tolist()
images = np.array(list_img).reshape(-1, 128, 128, 1)
return images
# load image
test = './lin_predict' # 你要测试的图片的路径
filelist = read_image(test)
im_xiangsu(filelist)
filelist = read_image(test)
# 依次读入filelist中的图片
for i in range(len(filelist)):
img = im_s_array(filelist,i)
# 预测图像
dict_label={0:'雨天',1:'晴天',2:'多云',3:'雪天',4:'雷天',5:'雾天'}
predictions_single = model.predict(img)
print(filelist[i],"的预测结果为:", dict_label[np.argmax(predictions_single)])
# 这里返回数组中概率最大的那个
print(predictions_single)
6、结果展示
这个瓜皮模型,雷天和雨天分不清,有时候晴天和多云和雨天也分不清。