Initial commit
This commit is contained in:
@@ -0,0 +1,113 @@
|
||||
"""
|
||||
LeNet-5 example
|
||||
"""
|
||||
import gzip
|
||||
import numpy as np
|
||||
from requests import get
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
def download_file(url, file_name):
|
||||
"""
|
||||
Download files and stores them locally.
|
||||
"""
|
||||
with open(file_name, "wb") as file:
|
||||
response = get(url)
|
||||
file.write(response.content)
|
||||
|
||||
def read_mnist(images_path: str, labels_path: str):
|
||||
"""
|
||||
Read data and labels of the MNIST dataset.
|
||||
"""
|
||||
with gzip.open(labels_path, 'rb') as labels_file:
|
||||
labels = np.frombuffer(labels_file.read(), dtype=np.uint8, offset=8)
|
||||
|
||||
with gzip.open(images_path,'rb') as images_file:
|
||||
length = len(labels)
|
||||
# Load flat 28x28 px images (784 px), and convert them to 28x28 px
|
||||
features = np.frombuffer(images_file.read(), dtype=np.uint8, offset=16) \
|
||||
.reshape(length, 784) \
|
||||
.reshape(length, 28, 28, 1)
|
||||
|
||||
return features, labels
|
||||
|
||||
def display_image(dataset, position):
|
||||
"""
|
||||
Display image at position of the given dataset.
|
||||
"""
|
||||
image = dataset['features'][position].squeeze()
|
||||
plt.title('Example %d. Label: %d' % (position, dataset['labels'][position]))
|
||||
plt.imshow(image, cmap=plt.get_cmap('gray_r'))
|
||||
plt.show()
|
||||
|
||||
def main():
|
||||
"""
|
||||
Defined starting point of source code.
|
||||
"""
|
||||
|
||||
# Step 1:
|
||||
# Download the MNIST dataset with consist of labeled handwritten images (28x28 px).
|
||||
|
||||
# train-images-idx3-ubyte.gz: training set images (9912422 bytes)
|
||||
download_file('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz',
|
||||
'train-images-idx3-ubyte.gz')
|
||||
# train-labels-idx1-ubyte.gz: training set labels (28881 bytes)
|
||||
download_file('http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz',
|
||||
'train-labels-idx1-ubyte.gz')
|
||||
# t10k-images-idx3-ubyte.gz: test set images (1648877 bytes)
|
||||
download_file('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz',
|
||||
't10k-images-idx3-ubyte.gz')
|
||||
# t10k-labels-idx1-ubyte.gz: test set labels (4542 bytes)
|
||||
download_file('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz',
|
||||
't10k-labels-idx1-ubyte.gz')
|
||||
|
||||
train = {}
|
||||
test = {}
|
||||
|
||||
# Step 2:
|
||||
# Read MNIST dataset (training and testing)
|
||||
train['features'], train['labels'] = read_mnist('train-images-idx3-ubyte.gz',
|
||||
'train-labels-idx1-ubyte.gz')
|
||||
test['features'], test['labels'] = read_mnist('t10k-images-idx3-ubyte.gz',
|
||||
't10k-labels-idx1-ubyte.gz')
|
||||
|
||||
# Step 3:
|
||||
# Explore the dataset
|
||||
print('Number of training images:', train['features'].shape[0])
|
||||
print('Number of test images:', test['features'].shape[0])
|
||||
|
||||
# Step 4:
|
||||
# Dispan some images
|
||||
# display_image(train, 0)
|
||||
# display_image(train, 1)
|
||||
# display_image(train, 2)
|
||||
|
||||
# Step 5:
|
||||
# Plot information about the training data
|
||||
train_labels_count = np.unique(train['labels'], return_counts=True)
|
||||
dataframe_train_labels = pd.DataFrame({'Label':train_labels_count[0],
|
||||
'Count':train_labels_count[1]})
|
||||
print(dataframe_train_labels)
|
||||
|
||||
# Step 5:
|
||||
# Split training data into training and validation
|
||||
validation = {}
|
||||
train['features'], validation['features'], train['labels'], validation['labels'] \
|
||||
= train_test_split(train['features'], train['labels'], test_size=0.2, random_state=0)
|
||||
|
||||
print('Number of training images:', train['features'].shape[0])
|
||||
print('Number of validation images:', validation['features'].shape[0])
|
||||
|
||||
# Step 6:
|
||||
# Prepare our input features.
|
||||
# The LeNet architecture accepts 32x32 pixel images as input, but MNIST data is 28x28 pixels.
|
||||
# We simply pad the imges with zeros to overcome that.
|
||||
train['features'] = np.pad(train['features'], ((0,0),(2,2),(2,2),(0,0)), 'constant')
|
||||
validation['features'] = np.pad(validation['features'], ((0,0),(2,2),(2,2),(0,0)), 'constant')
|
||||
test['features'] = np.pad(test['features'], ((0,0),(2,2),(2,2),(0,0)), 'constant')
|
||||
|
||||
print("Updated Image Shape: {}".format(train['features'][0].shape))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user