zheyuan xu:
Why the utilization of cpu is too high and gpu so low when using keras? Like this picture, enter image description here For every 5 seconds, utilization of gpu is about 80% during 1 second and 0 the rest of time. I use tensorflow.keras.utils.Sequence to load data, and use model.fit_generator to train. I don't kown how to deal with these problems.
train.py
def create_callbacks(opt, steps_per_epoch, model=None):
log_dir = os.path.join(opt.root_path, opt.log_dir)
if not os.path.exists(log_dir):
os.mkdir(log_dir)
#tensorboard = TensorBoard(log_dir=log_dir, write_graph=True)
result_path = os.path.join(opt.root_path, opt.result_path)
if not os.path.exists(result_path):
os.mkdir(result_path)
if model is not None:
checkpoint = ParallelModelCheckpoint(model, os.path.join(result_path, 'ep{epoch:03d}-val_acc{val_acc:.2f}.h5'),
monitor='val_acc', save_weights_only=True, save_best_only=True, period=1)
else:
checkpoint = ModelCheckpoint(os.path.join(result_path, 'ep{epoch:03d}-val_acc{val_acc:.2f}.h5'),
monitor='val_acc', save_weights_only=True, save_best_only=True, period=1)
early_stopping = EarlyStopping(monitor='val_acc', min_delta=0, patience=10)
learning_rate_scheduler = SGDRScheduler_with_WarmUp(0, opt.lr, steps_per_epoch, lr_decay=opt.lr_decay,
cycle_length=opt.cycle_length, multi_factor=opt.multi_factor,
warm_up_epoch=opt.warm_up_epoch)
print_lr = PrintLearningRate()
return [learning_rate_scheduler, print_lr, checkpoint, early_stopping]
def train(opt):
K.clear_session()
video_input = Input(shape=(None, None, None, 3))
model = nets.network[opt.network](video_input, num_classes=opt.num_classes)
print("Create {} model with {} classes".format(opt.network, opt.num_classes))
if opt.pretrained_weights is not None:
model.load_weights(opt.pretrained_weights)
print("Loading weights from {}".format(opt.pretrained_weights))
optimizer = get_optimizer(opt)
train_data_generator = DataGenerator(opt.data_name, opt.video_path, opt.train_list, opt.name_path,
'train', opt.batch_size, opt.num_classes, True, opt.short_side,
opt.crop_size, opt.clip_len, opt.n_samples_for_each_video)
val_data_generator = DataGenerator(opt.data_name, opt.video_path, opt.val_list, opt.name_path, 'val',
opt.batch_size, opt.num_classes, False, opt.short_side,
opt.crop_size, opt.clip_len, opt.n_samples_for_each_video)
callbacks = create_callbacks(opt, max(1, train_data_generator.__len__()), model)
if len(opt.gpus) > 1:
print('Using multi gpus')
parallel_model = multi_gpu_model(model, gpus=len(opt.gpus))
parallel_model.compile(optimizer=optimizer, loss=categorical_crossentropy, metrics=['accuracy'])
parallel_model.fit_generator(train_data_generator, steps_per_epoch=max(1, train_data_generator.__len__()),
epochs=opt.epochs, validation_data=val_data_generator, validation_steps=max(1, val_data_generator.__len__()),
workers=opt.workers, callbacks=callbacks, use_multiprocessing=True)
else:
model.compile(optimizer=optimizer, loss=categorical_crossentropy, metrics=['accuracy'])
model.fit_generator(train_data_generator, steps_per_epoch=max(1, train_data_generator.__len__()),
epochs=opt.epochs, validation_data=val_data_generator, validation_steps=max(1, val_data_generator.__len__()),
workers=opt.workers, callbacks=callbacks, use_multiprocessing=True)
model.save_weights(os.path.join(os.path.join(opt.root_path, opt.result_path), 'trained_weights_final.h5'))
if __name__=="__main__":
opt = parse_opts()
print(opt)
os.environ['CUDA_VISIBLE_DEVICES'] = ",".join(map(str, opt.gpus))
train(opt)
some parameters like this:
--num_classes=60 \
--workers=4 \
--batch_size=64 \
--crop_size=160 \
--clip_len=32 \
--short_side 192 224 \
--gpus 8 9
some code of my dataloader of keras
import os
import random
import math
import copy
import time
import numpy as np
from tensorflow.keras.utils import Sequence
from .spatial_transforms import RandomCrop, Scale, RandomHorizontalFlip, CenterCrop, Compose, Normalize, PreCenterCrop
from .tempora_transforms import TemporalRandomCrop, TemporalCenterCrop
from .utils import load_value_file, load_clip_video
def get_ntu(video_path, file_path, name_path, mode, num_classes):
lines = open(name_path, 'r').readlines()
assert num_classes == len(lines)
video_files = []
label_files = []
for path in open(file_path, 'r'):
label = int(path.split('A')[1][:3])-1
label_files.append(label)
video_files.append(os.path.join(video_path, path.strip()))
return video_files, label_files
def get_ucf101(video_path, file_path, name_path, mode, num_classes):
name2index = {}
lines = open(name_path, 'r').readlines()
for i, class_name in enumerate(lines):
class_name = class_name.split()[1]
name2index[str(class_name)]=i
assert num_classes == len(name2index)
video_files = []
label_files = []
for path_label in open(file_path, 'r'):
if mode == 'train':
path, _ = path_label.split()
elif mode == 'val':
path = path_label
else:
raise ValueError('mode must be train or val')
pathname, _ = os.path.splitext(path)
video_files.append(os.path.join(video_path, pathname))
label = pathname.split('/')[0]
label_files.append(name2index[label])
return video_files, label_files
class DataGenerator(Sequence):
def __init__(self, data_name, video_path, file_path,
name_path, mode, batch_size, num_classes,
shuffle, short_side=[256, 320], crop_size=224,
clip_len=64, n_samples_for_each_video=1):
self.batch_size = batch_size
self.num_classes = num_classes
self.shuffle = shuffle
if data_name == 'ucf101':
self.video_files, self.label_files = get_ucf101(video_path, file_path, name_path, mode, num_classes)
elif data_name == 'ntu':
self.video_files, self.label_files = get_ntu(video_path, file_path, name_path, mode, num_classes)
if mode == 'train':
self.spatial_transforms = Compose([
PreCenterCrop(),
RandomCrop(crop_size),
RandomHorizontalFlip(),
Normalize()
])
self.temporal_transforms = TemporalRandomCrop(clip_len)
elif mode == 'val':
self.spatial_transforms = Compose([
PreCenterCrop(),
Scale(crop_size),
Normalize()
])
self.temporal_transforms = TemporalCenterCrop(clip_len)
else:
raise ValueError('mode must be train or val')
self.dataset = self.makedataset(n_samples_for_each_video, clip_len)
print('Dataset loading Successful!!!')
if self.shuffle:
random.shuffle(self.dataset)
def __len__(self):
return math.ceil(len(self.video_files)/self.batch_size)
def __getitem__(self, index):
batch_dataset = self.dataset[index*self.batch_size:(index+1)*self.batch_size]
video_data, label_data = self.data_generator(batch_dataset)
return video_data, label_data
def on_epoch_end(self):
if self.shuffle:
random.shuffle(self.dataset)
def makedataset(self, n_samples_for_each_video, clip_len):
dataset = []
for i, video_file in enumerate(self.video_files):
if i % 1000 == 0:
print('dataset loading [{}/{}]'.format(i, len(self.video_files)))
if not os.path.exists(video_file):
print('{} is not exist'.format(video_file))
continue
n_frame_path = os.path.join(video_file, 'n_frames')
n_frames = int(load_value_file(n_frame_path))
if n_frames<=0:
continue
sample = {
'video_path':video_file,
'label':int(self.label_files[i])
}
if n_samples_for_each_video == 1:
sample['frame_indices'] = list(range(1, n_frames+1))
dataset.append(sample)
else:
if n_samples_for_each_video > 1:
step = max(1, math.ceil((n_frames - 1 - clip_len) / (n_samples_for_each_video - 1)))
else:
step = clip_len
for j in range(1, n_frames, step):
sample_j = copy.deepcopy(sample)
sample_j['frame_indices'] = list(range(j, min(n_frames + 1, j + clip_len)))
dataset.append(sample_j)
return dataset
def data_generator(self, batch_dataset):
video_data = []
label_data = []
for data in batch_dataset:
path = data['video_path']
frame_indices = data['frame_indices']
if self.temporal_transforms is not None:
frame_indices = self.temporal_transforms(frame_indices)
clip = load_clip_video(path, frame_indices)
if self.spatial_transforms is not None:
self.spatial_transforms.randomize_parameters()
clip = [self.spatial_transforms(img) for img in clip]
clip = np.stack(clip, 0)
video_data.append(clip)
label_data.append(data['label'])
video_data = np.array(video_data)
label_data = np.eye(self.num_classes)[label_data]
return video_data, label_data
Posted in
S.E.F
via
StackOverflow & StackExchange Atomic Web Robots