几天前,我陷入了这个问题。在网上进行了大量研究后,即使我无法直接找到合适的解决方案,我也可以提出一个想法。 Follow symlink在这里不起作用,因为两个目录中的图像属性完全不同。再次,将应用不同的图像增强。
在这里,我们必须创建两个单独的图像生成器并分别进行配置。然后我们必须合并它们。我发现这个想法解决了我的问题,here。
但是我们需要稍微调整一下。由于两个目录具有不同数量的图像,因此简单地将两个相同大小的数据生成器组合在一起将产生不平衡的准备就绪图像份额。相对而言,更多的图像将从具有较少图像的目录中馈送到模型。因此,这两个数据生成器的批处理大小必须根据相应目录中的图像数来计算。这就是我的解决方案的独创性。
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import Sequence
import matplotlib.pyplot as plt
import numpy as np
import os
class MergedGenerators(Sequence):
def __init__(self,batch_size,generators=[],sub_batch_size=[]):
self.generators = generators
self.sub_batch_size = sub_batch_size
self.batch_size = batch_size
def __len__(self):
return int(
sum([(len(self.generators[idx]) * self.sub_batch_size[idx])
for idx in range(len(self.sub_batch_size))]) /
self.batch_size)
def __getitem__(self,index):
"""Getting items from the generators and packing them"""
X_batch = []
Y_batch = []
for generator in self.generators:
if generator.class_mode is None:
x1 = generator[index % len(generator)]
X_batch = [*X_batch,*x1]
else:
x1,y1 = generator[index % len(generator)]
X_batch = [*X_batch,*x1]
Y_batch = [*Y_batch,*y1]
if self.generators[0].class_mode is None:
return np.array(X_batch)
return np.array(X_batch),np.array(Y_batch)
def build_datagenerator(dir1=None,dir2=None,batch_size=32):
n_images_in_dir1 = sum([len(files) for r,d,files in os.walk(dir1)])
n_images_in_dir2 = sum([len(files) for r,files in os.walk(dir2)])
# Have to set different batch size for two generators as number of images
# in those two directories are not same. As we have to equalize the image
# share in the generators
generator1_batch_size = int((n_images_in_dir1 * batch_size) /
(n_images_in_dir1 + n_images_in_dir2))
generator2_batch_size = batch_size - generator1_batch_size
generator1 = ImageDataGenerator(
rescale=1. / 255,shear_range=0.2,zoom_range=0.2,rotation_range=5.,horizontal_flip=True,)
generator2 = ImageDataGenerator(
rescale=1. / 255,horizontal_flip=False,)
# generator2 has different image augmentation attributes than generaor1
generator1 = generator1.flow_from_directory(
dir1,target_size=(128,128),color_mode='rgb',class_mode=None,batch_size=generator1_batch_size,shuffle=True,seed=42,interpolation="bicubic",)
generator2 = generator2.flow_from_directory(
dir2,batch_size=generator2_batch_size,)
return MergedGenerators(
batch_size,generators=[generator1,generator2],sub_batch_size=[generator1_batch_size,generator2_batch_size])
def test_datagen(batch_size=32):
datagen = build_datagenerator(dir1="./asdf",dir2="./asdf2",batch_size=batch_size)
print("Datagenerator length (Batch count):",len(datagen))
for batch_count,image_batch in enumerate(datagen):
if batch_count == 1:
break
print("Images: ",image_batch.shape)
plt.figure(figsize=(10,10))
for i in range(image_batch.shape[0]):
plt.subplot(1,i + 1)
plt.imshow(image_batch[i],interpolation='nearest')
plt.axis('off')
plt.tight_layout()
test_datagen(4)
要获取子发电机的批量大小:
Where,b = Batch Size Of Any Sub-generator
B = Desired Batch Size Of The Merged Generator
n = Number Of Images In That Directory Of Sub-generator
the sum of n = Total Number Of Images In All Directories
本文链接:https://www.f2er.com/2056327.html