影像辨識
(Modify from https://github.com/yenlung/Deep-Learning-Basics) ##
利用ResNet製作影像辨識 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from urllib.request import urlretrieve
urlretrieve("https://rawcdn.githack.com/MaxWutw/Deep-Learning/a9bbc7ed859d16ebc782f3bbde5bd2e1c65073dc/Image%20recognition/type.txt", "classes.txt")
photo = []
for i in range(1,10):
urlretrieve(f"https://github.com/MaxWutw/Deep-Learning/raw/main/ResNet/photo{i}.jpg", f"photo{i}.jpg")
photo.append(f"photo{i}.jpg")
store = []
for i in range(0,9):
img = load_img(photo[i], target_size = (224,224))
x = img_to_array(img)
store.append(x)
resnet = ResNet50()
with open('classes.txt') as f:
labels = [line.strip() for line in f.readlines()]
for i in range(0, 9):
plt.figure(i)
plt.axis('off')
plt.imshow(store[i]/255)
store[i] = store[i].reshape(1, 224, 224, 3)
inp = preprocess_input(store[i])
[k] = np.argmax(resnet.predict(inp), axis=-1)
tex = labels[k].split(' ')
plt.text(15, 220,f"ResNet judge: {tex[0]}", fontsize = 15)
plt.show()
print(f"ResNet 覺得是 {labels[k]}")1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.preprocessing.image import load_img, img_to_array
tmp = []
for i in range(1,10):
tmp.append(f"photo{i}.jpg")
data = []
for i in range(0,9):
img = load_img(tmp[i], target_size = (256,256))
x = img_to_array(img)
data.append(x)
data = np.asarray(data, dtype = np.uint8)
target = np.array([1,1,1,2,2,2,3,3,3])
x_train = preprocess_input(data)
plt.axis('off')
n = 1
plt.imshow(x_train[n])
y_train = to_categorical(target-1, 3)
resnet = ResNet50(include_top=False, pooling="avg")
resnet.trainable = False
model = Sequential()
model.add(resnet)
model.add(Dense(3, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
model.fit(x_train, y_train, batch_size=9, epochs=25)
y_predict = np.argmax(model.predict(x_train), -1)
labels=["麻雀", "喜鵲", "白頭翁"]
print(y_predict)
print(target-1)
testing = []
pho = []
for i in range(1,4):
pho.append(f"test{i}.jpg")
for i in range(0,3):
img = load_img(pho[i], target_size = (256,256))
x = img_to_array(img)
testing.append(x)
testing = np.asarray(testing, dtype = np.uint8)
testing_data = preprocess_input(testing)
final = np.argmax(model.predict(testing_data), -1)
for i in range(3):
print(f"{i+1}. CNN judge: ", labels[final[i]])
print("Answer : 麻雀、白頭翁、喜鵲")
### 程式介紹
這裡是使用遷移學習,我們會運用ResNet做好的訓練,將ResNet加入我們的神經網路,但是不讓ResNet重新訓練,而是讓它照著之前的訓練經驗來判斷我們的圖片,所以我們不需要大批的數據就能達到效果,簡單的說就是運用ResNet舊有的經驗判斷它沒看過的圖。
這邊一樣會引入上一個ResNet需要用到的套件,而這邊會多加幾樣架設神經網路需要用到的套件,因為我們只要ResNet的經驗,實際上我們是將ResNet加入我們的神經網路,成為其中一個份子。
首先是讀入相片,這邊是讀入電腦中的圖篇,所以記得將程式碼和照片放在同一個目錄,不然就要加上準確路徑,再來將我們讀入的圖片轉成array。後面的target是我們的解答,這個地方可以依據數據的不同而進行更改,再來就是做照片的前置處理,至於這邊的y_train,其實就是我們的答案,只是轉成one-hot
encoding,接著我們將ResNet50的網路取出,但這邊要記得加入第一個參數,由於我們是要訓練我們的資料,所以就將那1000種種類的那一層刪去,而第二個參數代表ResNet50經過平均池化再做回傳,resnet.trainable
=
False是說我們不要再讓ResNet50再重新訓練,因為ResNet50非常龐大,而且我們也不需要。
再來就是建造我們的神經網路,第一層加入剛剛已經提取的resnet,第二層加入Dense層,因為我們要做fully
connected,再來進行compile,這裡的loss使用categorical_crossentropy,optimization是使用adam,adam相對於SGD快上許多,也同時比較穩定,最後進行fit訓練。
再來我們將剛才訓練的資料當成第一筆測試資料,並將它們和實際答案比對,來看錯在哪,但是單純使用訓練資料當測試資料很不安全,所以我最後再從我電腦中加入幾張網路上找的圖片,最後將結果輸出就是我們要的結果。
## 利用遷移學習打造影像辨識在Gradio上執行 ###
程式碼(此部分會利用作者github的圖片訓練) 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from urllib.request import urlretrieve
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import gradio as gr
tmp = []
for i in range(1,10):
urlretrieve(f"https://github.com/MaxWutw/Deep-Learning/raw/main/Image%20recognition/photo{i}.jpg", f"photo{i}.jpg")
tmp.append(f"photo{i}.jpg")
data = []
for i in range(0,9):
img = load_img(tmp[i], target_size = (256,256))
x = img_to_array(img)
data.append(x)
data = np.asarray(data, dtype = np.uint8)
target = np.array([1,1,1,2,2,2,3,3,3])
x_train = preprocess_input(data)
plt.axis('off')
n = 1
plt.imshow(x_train[n])
y_train = to_categorical(target-1, 3)
y_train[n]
resnet = ResNet50(include_top=False, pooling="avg")
resnet.trainable = False
model = Sequential()
model.add(resnet)
model.add(Dense(3, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
model.fit(x_train, y_train, batch_size=9, epochs=25)
y_predict = np.argmax(model.predict(x_train), -1)
labels=["麻雀", "喜鵲", "白頭翁"]
print(y_predict)
print(target-1)
testing = []
pho = []
for i in range(1,4):
urlretrieve(f"https://github.com/MaxWutw/Deep-Learning/raw/main/Image%20recognition/test{i}.jpg", f"test{i}.jpg")
pho.append(f"test{i}.jpg")
for i in range(0,3):
img = load_img(pho[i], target_size = (256,256))
x = img_to_array(img)
testing.append(x)
testing = np.asarray(testing, dtype = np.uint8)
testing_data = preprocess_input(testing)
final = np.argmax(model.predict(testing_data), -1)
for i in range(3):
print(f"{i+1}. CNN judge: ", labels[final[i]])
print("Answer : 麻雀、白頭翁、喜鵲")
def classify_image(inp):
inp = inp.reshape((-1, 256, 256, 3))
inp = preprocess_input(inp)
prediction = model.predict(inp).flatten()
return {labels[i]: float(prediction[i]) for i in range(3)}
image = gr.inputs.Image(shape=(256, 256), label="鳥類照片")
label = gr.outputs.Label(num_top_classes=3, label="AI辨識結果")
gr.Interface(fn=classify_image, inputs=image, outputs=label,
title="AI 三種鳥類辨識機",
description="我能辨識台灣常見的三種鳥類: 麻雀、喜鵲、白頭翁。",
capture_session=True).launch()