pytorch - Flower Classification

pytorch를 이용해서 꽃이미지를 구별해보도록 하자.

여기에서 활용할 데이터셋은 이거다. 총 102종류의 꽃이 있고, 각각 여러개의 이미지가 있다. 데이터셋을 다운로드 하면 train, valid, test로 나눠져 있고 이를 활용해서 꽃을 구별해보려고 한다.

1. 데이터 로딩

# 랜덤으로 학습하기 위해서 이미지를 약간씩 돌린다.
# 마찬가지로 랜덤하게 크롭한다.
# 또한 랜덤하게 상하 반전을 한다.
# 데이터를 텐서로 바꾼다.
# 이미지를 정규화 한다.
train_transforms = transforms.Compose([transforms.RandomRotation(30),
                                       transforms.RandomResizedCrop(224),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.ToTensor(),
                                       transforms.Normalize([0.485, 0.456, 0.406],
                                                            [0.229, 0.224, 0.225])])

# validation과 test에는 랜덤을 적용하지 않는다.
# 리사이즈와, 가운데 크롭만 한다.
valid_transforms = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406],
                                                           [0.229, 0.224, 0.225])])

test_transforms = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406],
                                                           [0.229, 0.224, 0.225])])

# 이미지를 읽어드린후, 위에 메소드를 적용한다.
image_datasets = dict()
image_datasets['train'] = datasets.ImageFolder(train_dir, transform=train_transforms)
image_datasets['valid'] = datasets.ImageFolder(valid_dir, transform=valid_transforms)
image_datasets['test'] = datasets.ImageFolder(test_dir, transform=test_transforms)

# 배치로 만든다.
dataloaders = dict()
dataloaders['train'] = torch.utils.data.DataLoader(image_datasets['train'], batch_size=64, shuffle=True)
dataloaders['valid'] = torch.utils.data.DataLoader(image_datasets['valid'], batch_size=32)
dataloaders['test']  = torch.utils.data.DataLoader(image_datasets['test'], batch_size=20)

폴더에 숫자가 있는데, 해당 숫자가 어떤 꽃인지 매칭 해준다.

{
  "21": "fire lily",
  "3": "canterbury bells",
  "45": "bolero deep blue",
  "1": "pink primrose",
  "34": "mexican aster",
  "27": "prince of wales feathers",
  "7": "moon orchid",
  "16": "globe-flower",
  "25": "grape hyacinth",
  "26": "corn poppy",
  "79": "toad lily",
  "39": "siam tulip",
  "24": "red ginger",
  "67": "spring crocus",
  "35": "alpine sea holly",
  "32": "garden phlox",
  "10": "globe thistle",
  "6": "tiger lily",
  "93": "ball moss",
  "33": "love in the mist",
  "9": "monkshood",
  "102": "blackberry lily",
  "14": "spear thistle",
  "19": "balloon flower",
  "100": "blanket flower",
  "13": "king protea",
  "49": "oxeye daisy",
  "15": "yellow iris",
  "61": "cautleya spicata",
  "31": "carnation",
  "64": "silverbush",
  "68": "bearded iris",
  "63": "black-eyed susan",
  "69": "windflower",
  "62": "japanese anemone",
  "20": "giant white arum lily",
  "38": "great masterwort",
  "4": "sweet pea",
  "86": "tree mallow",
  "101": "trumpet creeper",
  "42": "daffodil",
  "22": "pincushion flower",
  "2": "hard-leaved pocket orchid",
  "54": "sunflower",
  "66": "osteospermum",
  "70": "tree poppy",
  "85": "desert-rose",
  "99": "bromelia",
  "87": "magnolia",
  "5": "english marigold",
  "92": "bee balm",
  "28": "stemless gentian",
  "97": "mallow",
  "57": "gaura",
  "40": "lenten rose",
  "47": "marigold",
  "59": "orange dahlia",
  "48": "buttercup",
  "55": "pelargonium",
  "36": "ruby-lipped cattleya",
  "91": "hippeastrum",
  "29": "artichoke",
  "71": "gazania",
  "90": "canna lily",
  "18": "peruvian lily",
  "98": "mexican petunia",
  "8": "bird of paradise",
  "30": "sweet william",
  "17": "purple coneflower",
  "52": "wild pansy",
  "84": "columbine",
  "12": "colt's foot",
  "11": "snapdragon",
  "96": "camellia",
  "23": "fritillary",
  "50": "common dandelion",
  "44": "poinsettia",
  "53": "primula",
  "72": "azalea",
  "65": "californian poppy",
  "80": "anthurium",
  "76": "morning glory",
  "37": "cape flower",
  "56": "bishop of llandaff",
  "60": "pink-yellow dahlia",
  "82": "clematis",
  "58": "geranium",
  "75": "thorn apple",
  "41": "barbeton daisy",
  "95": "bougainvillea",
  "43": "sword lily",
  "83": "hibiscus",
  "78": "lotus lotus",
  "88": "cyclamen",
  "94": "foxglove",
  "81": "frangipani",
  "74": "rose",
  "89": "watercress",
  "73": "water lily",
  "46": "wallflower",
  "77": "passion flower",
  "51": "petunia"
}

그리고 이번 분석에는 이미 만들어져 있는 모델을 사용하려고 한다. VGG16 이다.

2. VGG16

vgg-16

입력: 224*224 크기의 고정된 RGB 이미지
구조:
- Convoluitional Layer (3x3 filter, stride=1, padding=True)
- Max-Pooling Layer (2x2 filtter, stride=2)
- 1x1 Conv Layer (1x1 filter, stride=1)
- Fully Connected Layer (4096 > 4096 > 1000)
특징:
- 모든 레이어에 3x3 필터 적용
- 1x1 Conv Layer 사용
- 다섯장의 Max-Pooling Layer 사용

3. Classifier 만들기

output_size = len(cat_to_name)
vgg16_model = models.vgg16(pretrained=True)

# 244x244
input_size = vgg16_model.classifier[0].in_features

# 미리 만들어진 모델이라 적용할 필요 없음.
for param in vgg16_model.parameters():
    param.requires_grad = False

classifier = nn.Sequential(
    OrderedDict([
        ('fc1', nn.Linear(input_size, input_size // 8)),
        ('relu1', nn.ReLU()),
        ('droupout', nn.Dropout(p=0.20)),

        ('fc2', nn.Linear(input_size // 8, input_size // 32)),
        ('relu2', nn.ReLU()),
        ('droupout', nn.Dropout(p=0.20)),

        ('fc3', nn.Linear(input_size // 32, input_size // 128)),
        ('relu3', nn.ReLU()),
        ('droupout', nn.Dropout(p=0.20)),

        ('fc4', nn.Linear(input_size // 128, output_size)),
        ('softmax', nn.LogSoftmax(dim=1))
    ])
)

vgg16_model.classifier = classifier

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
epochs = 20
learning_rate = 0.001

criterion = nn.NLLLoss()
optimizer = optim.Adam(vgg16_model.classifier.parameters(), lr=learning_rate)
vgg16_model = vgg16_model.to(device)

오차 함수로는 NLLLoss를 Optimizer로 Adam을 사용하였다.

# epochs만큼 훈련
for e in range(epochs):
    running_loss = 0

    # 트레이닝 셋
    for index, (t_inputs, t_labels) in enumerate(dataloaders['train']):

        # cpu나 cuda로 변환
        t_inputs, t_labels = t_inputs.to(device), t_labels.to(device)

        optimizer.zero_grad()

        t_ouputs = vgg16_model.forward(t_inputs)
        loss = criterion(t_ouputs, t_labels)
        loss.backward()
        optimizer.step()

        # 오차 축적
        running_loss += loss.item()

        # 5번마다 validset으로 확인
        if (index + 1) % 5 == 0:
            # eval 모드로 변환
            vgg16_model.eval()
            v_loss = 0
            accuracy = 0

            # valid set
            for v_inputs, v_labels in dataloaders['valid']:

                v_inputs, v_labels = v_inputs.to(device), v_labels.to(device)
                optimizer.zero_grad()

                # 자동미분 off
                with torch.no_grad():
                    v_output = vgg16_model.forward(v_inputs)
                    loss = criterion(v_output, v_labels)
                    ps = torch.exp(v_output)
                    top_p, top_class = ps.topk(1, dim=1)
                    equals = top_class == v_labels.view(*top_class.shape)
                    accuracy += torch.mean(equals.type(torch.FloatTensor))

                vgg16_model.train()

            train_losses.append(running_loss / 5)
            valid_losses.append(loss / len(dataloaders['valid']))
            accuracies.append(accuracy / len(dataloaders['valid']))

            ## 결과 및 정확도 출력
            print_accuracy = "{:.5f}".format(accuracies[-1])

            if len(accuracies) >= 2:
                change = accuracies[-1] - accuracies[-2]
                if change > 0:
                    print_accuracy += '↑'
                elif change == 0 :
                    print_accuracy += '-'
                else:
                    print_accuracy += '↓'


            running_loss = 0

            print("Epoch: {}/{}".format(e+1, epochs),
                  "Training Loss: {:.5f}".format(train_losses[-1]),
                  "Valodation Loss: {:.5f}".format(valid_losses[-1]),
                  "Accuracy: {}".format(print_accuracy))

Epoch: 1/20 Training Loss: 4.69953 Valodation Loss: 0.21000 Accuracy: 0.02043
Epoch: 1/20 Training Loss: 4.53161 Valodation Loss: 0.19509 Accuracy: 0.06490↑
Epoch: 1/20 Training Loss: 4.50895 Valodation Loss: 0.19441 Accuracy: 0.09135↑
Epoch: 1/20 Training Loss: 4.22797 Valodation Loss: 0.20272 Accuracy: 0.13582↑
Epoch: 1/20 Training Loss: 4.13869 Valodation Loss: 0.21389 Accuracy: 0.17909↑
Epoch: 1/20 Training Loss: 3.75459 Valodation Loss: 0.20136 Accuracy: 0.19952↑
Epoch: 1/20 Training Loss: 3.63869 Valodation Loss: 0.19415 Accuracy: 0.24760↑
...
Epoch: 20/20 Training Loss: 0.36903 Valodation Loss: 0.00035 Accuracy: 0.89663↓
Epoch: 20/20 Training Loss: 0.53348 Valodation Loss: 0.00012 Accuracy: 0.91106↑
Epoch: 20/20 Training Loss: 0.50826 Valodation Loss: 0.00007 Accuracy: 0.90986↓
Epoch: 20/20 Training Loss: 0.36002 Valodation Loss: 0.00102 Accuracy: 0.90385↓
Epoch: 20/20 Training Loss: 0.58472 Valodation Loss: 0.00031 Accuracy: 0.90745↑
Epoch: 20/20 Training Loss: 0.50919 Valodation Loss: 0.00016 Accuracy: 0.91346↑
Epoch: 20/20 Training Loss: 0.49237 Valodation Loss: 0.00009 Accuracy: 0.91707↑

아쉽게도 8번 째 부터는 90% 벽을 넘지 못하고 계속 같은 수준에서 머물렀다.

Colab Notebook에서 돌렸는데, 이렇게 까지 오래 돌릴 필요가 있었나 싶을 정도로 오래 걸렸다. (4시간 정도)

vgg16_model.eval()

vgg16_model.to(device)
accuracy = 0
test_error = 0

for images, labels in dataloaders['test']:
  images, labels = images.to(device), labels.to(device)

  with torch.no_grad():
    output = vgg16_model.forward(images)

  test_error += criterion(output, labels).item()
  ps = torch.exp(output).data
  equal = Variable(labels.data == ps.max(1)[1]).float()
  accuracy += torch.mean(equal)


print("Test Error: {:.5f}".format(test_error/len(dataloaders['test'])))
print("Test Accuracy: {:.5f}".format(accuracy/len(dataloaders['test'])))

Test Error: 0.38797
Test Accuracy: 0.90456

90%의 정확도가 나왔다. 기초적인 것만 활용한 것 치고는 괜찮은 결과 였다.

# 이미지를 위에서 변환한 것 처럼 변환해 준다.
def process_image(image):
    ''' Scales, crops, and normalizes a PIL image for a PyTorch model,
        returns an Numpy array
    '''

    image = Image.open(image)

    transformer = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406],
                                                           [0.229, 0.224, 0.225])])

    return transformer(image)

    # TODO: Process a PIL image for use in a PyTorch model

img_path = '/content/flowers/test/1/image_06743.jpg'
img = process_image(img_path)
img.shape

# 예측
def predict(image_path, model, topk=5):
    ''' Predict the class (or classes) of an image using a trained deep learning model.
    '''

    # eval, cpu모드로 전환
    model.eval()
    model.cpu()

    image = process_image(image_path)
    image = image.unsqueeze(0)

    with torch.no_grad():
      output = model.forward(image)
      top_prob, top_labels = torch.topk(output, topk)
      top_prob = top_prob.exp()
      top_prob_array = top_prob.data.numpy()[0]

      top_prob_array = list(map(lambda x: round(float(x), 5), top_prob_array))
      class_to_idx = {v: k for k, v in model.class_to_idx.items()}

      top_labels_data = top_labels.data.numpy()
      top_labels_list = top_labels_data[0].tolist()

      top_classes = [class_to_idx[x] for x in top_labels_list]

    return top_prob_array, top_classes

# 이미지 보기
def imshow(image, ax=None, title=None):
    """Imshow for Tensor."""
    if ax is None:
        fig, ax = plt.subplots()

    # PyTorch tensors assume the color channel is the first dimension
    # but matplotlib assumes is the third dimension
    image = image.numpy().transpose((1, 2, 0))

    # Undo preprocessing
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    image = std * image + mean

    image = np.clip(image, 0, 1)

    ax.imshow(image)

    return ax

def check(image_path, image_label=None):

  probs, classes = predict(image_path, vgg16_model)

  class_names = [cat_to_name[x] for x in classes]
  print('is this {}?'.format(class_names[0]))

  if image_label:
    print('this is {}.'.format(cat_to_json.get(classes[0])))

  fig, ax = plt.subplots()
  image_to_show = process_image(image_path)
  imshow(image_to_show, ax, class_names[-1])

  fig, ax = plt.subplots()

  y_pos = np.arange(len(class_names))

  plt.barh(y_pos, probs)
  plt.yticks(y_pos, class_names)
  plt.xlabel('Probability')
  plt.show()

is this fritillary?
this is fritillary.