《Kaggle Histopathologic Cancer Detection比赛》之Tensorflow DatasetKeras Model实现

it2023-01-24  63

Keras和Tensorflow的结合相当紧密,使用Keras创建模型,然后使用Tensorflow的Dataset喂数据,训练结果保存为Tensorflow的Checkpoint都是可以的,这边给出一个使用tf.Dataset->Keras Model->Checkpoint的实现供参考记录 其他使用Pytorch,Keras Generator,Tensorflow2.0的实现方式参考我的其他博文。

import numpy as np import tensorflow as tf import os,sys,csv import cv2 as cv import matplotlib.pyplot as plt tf.enable_eager_execution() tf.VERSION AUTOTUNE = tf.data.experimental.AUTOTUNE IMAGE_COUNT_TRAIN = 0 IMAGE_COUNT_TEST = 0 BATCH_SIZE = 36 def prepare_dataset_trian_test(): def prepare_filenames_labels_all(): CSV_PATH = 'D:/ai_data/histopathologic-cancer-detection/train_labels.csv' TRAIN_IMAGE_FOLDER = 'D:/ai_data/histopathologic-cancer-detection/train' def read_csv(): with open(CSV_PATH) as f: reader = csv.reader(f) return list(reader) csv_read = read_csv() filenames = [] labels = [] for item in csv_read[1:]: filenames.append(TRAIN_IMAGE_FOLDER + '/' + item[0]+'.tif') labels.append(int(item[1])) # return filenames[:1000],labels[:1000] return filenames,labels def prepare_filenames_labels_train_validate(filenames_all,labels_all,validate_ratio): global IMAGE_COUNT_TRAIN global IMAGE_COUNT_TEST file_quant = len(filenames_all) file_quant_train = int(float(file_quant)*(1.0-validate_ratio)) # file_quant_test = file_quant - file_quant_train train_filenames = filenames_all[0:file_quant_train] train_labels = labels_all[0:file_quant_train] test_filenames = filenames_all[file_quant_train:] test_labels = labels_all[file_quant_train:] IMAGE_COUNT_TRAIN = len(train_filenames) IMAGE_COUNT_TEST = len(test_filenames) return train_filenames,train_labels,test_filenames,test_labels filenames_all,labels_all = prepare_filenames_labels_all() train_filenames,train_labels,test_filenames,test_labels = prepare_filenames_labels_train_validate(filenames_all,labels_all,0.2) def image_read_cv2(filename,label): image_decoded = cv.imread(filename.numpy().decode(), 1) return image_decoded, label def image_resize(image_decoded,label): image_decoded.set_shape([None, None, None]) image_resized = tf.image.resize_images(image_decoded, [299, 299]) return (image_resized / 255.0 - 0.5)*2 , tf.one_hot(label,2,on_value=1.0,off_value=0.0,axis=-1) def prepare_train_ds(filenames,labels): global BATCH_SIZE paths_ds = tf.data.Dataset.from_tensor_slices(filenames) labels_ds = tf.data.Dataset.from_tensor_slices(labels) paths_labels_ds = tf.data.Dataset.zip((paths_ds,labels_ds)) images_labels_ds = paths_labels_ds.shuffle(buffer_size=300000) images_labels_ds = images_labels_ds.map(lambda filename,label : tf.py_function( func=image_read_cv2, inp=[filename,label], Tout=[tf.uint8,tf.int32]), num_parallel_calls=AUTOTUNE) images_labels_ds = images_labels_ds.map(image_resize,num_parallel_calls=AUTOTUNE) images_labels_ds = images_labels_ds.repeat() images_labels_ds = images_labels_ds.batch(BATCH_SIZE) images_labels_ds = images_labels_ds.prefetch(buffer_size = 200) # plt.figure(figsize=(8,8)) # for n,(image,label) in enumerate(images_labels_ds.take(10)): # plt.subplot(2,5,n+1) # plt.imshow(image) # plt.grid(False) # plt.xticks([]) # plt.yticks([]) # plt.xlabel('xxxxxxxxx label') # plt.show() return images_labels_ds def prepare_test_ds(filenames,labels): global BATCH_SIZE paths_ds = tf.data.Dataset.from_tensor_slices(filenames) labels_ds = tf.data.Dataset.from_tensor_slices(labels) images_labels_ds = tf.data.Dataset.zip((paths_ds,labels_ds)) # images_labels_ds = images_labels_ds.shuffle(buffer_size=300000) images_labels_ds = images_labels_ds.map(lambda filename,label : tf.py_function( func=image_read_cv2, inp=[filename,label], Tout=[tf.uint8,tf.int32]), num_parallel_calls=AUTOTUNE) images_labels_ds = images_labels_ds.map(image_resize,num_parallel_calls=AUTOTUNE) images_labels_ds = images_labels_ds.repeat() images_labels_ds = images_labels_ds.batch(BATCH_SIZE) images_labels_ds = images_labels_ds.prefetch(buffer_size = 200) return images_labels_ds train_image_label_ds = prepare_train_ds(train_filenames,train_labels) test_image_label_ds = prepare_test_ds(test_filenames,test_labels) return train_image_label_ds,test_image_label_ds train_image_label_ds,test_image_label_ds = prepare_dataset_trian_test() keras_ds = train_image_label_ds keras_validate_ds = test_image_label_ds InceptionV3 = tf.keras.applications.InceptionV3(include_top=False, weights='imagenet', input_tensor=None, input_shape=(299, 299, 3)) inception_v3 = InceptionV3.get_layer(index = -1).output output = tf.keras.layers.AveragePooling2D((8, 8), strides=(8, 8), name='avg_pool')(inception_v3) output = tf.keras.layers.Flatten(name='flatten')(output) output = tf.keras.layers.Dense(2, activation='softmax', name='predictions')(output) model = tf.keras.models.Model(InceptionV3.input, output) model.trainable = True # for layer in model.layers[:-3]: # layer.trainable = False model.summary() # for x in model.non_trainable_weights: # print(x.name) optimizer = tf.keras.optimizers.SGD(lr = 0.0001, momentum = 0.9, decay = 0.0, nesterov = True) model.compile(loss='binary_crossentropy', optimizer = optimizer, metrics = ['accuracy']) steps_per_epoch = tf.ceil(IMAGE_COUNT_TRAIN/BATCH_SIZE) validation_steps = tf.ceil(IMAGE_COUNT_TEST/BATCH_SIZE) checkpoint_path = 'checkpoint4/cp.ckpt' checkpoint_dir = os.path.dirname(checkpoint_path) cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_path, save_weights_only=True, save_best_only=True, monitor='val_acc', mode='max', verbose = 1) tb_callback = tf.keras.callbacks.TensorBoard(log_dir='./Graph4', update_freq='batch',histogram_freq=0, write_graph=True, write_images=True) model.load_weights(checkpoint_path) model.fit(keras_ds, epochs=8, steps_per_epoch=steps_per_epoch, # validation_split = 0.2, validation_data = keras_validate_ds, validation_steps = validation_steps, callbacks = [cp_callback,tb_callback] ) print() print() print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>evaluate>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>') model.load_weights(checkpoint_path) loss,acc = model.evaluate(keras_validate_ds,steps=validation_steps) print("Restored model, accuracy: {:5.2f}%".format(100*acc))```
最新回复(0)