1. What is "Learning" in Image Processing? (Simple Definition)

Traditional Programming: You tell computer rules → Computer gives output

Machine Learning: You show examples → Computer learns rules by itself

Real-life example:

  • Traditional: You write code: "If pixel is brown and has 4 legs, it's a dog"
  • Machine Learning: You show 1000 dog photos and 1000 cat photos → Computer learns the difference automatically

2. Types of Learning in Image Processing

3. Method 1: Image Classification (Easiest)

Problem: Is this a dog or a cat?

Python Code (Using Pre-trained Model)

# First install: pip install tensorflow keras

import cv2
import numpy as np
from keras.models import load_model
from keras.applications.resnet50 import ResNet50, preprocess_input, decode_predictions

# Load pre-trained model (already learned from 1 million images)
model = ResNet50(weights='imagenet')

# Load and prepare your image
img = cv2.imread('dog.jpg')
img = cv2.resize(img, (224, 224))
img = np.expand_dims(img, axis=0)
img = preprocess_input(img)

# Predict
predictions = model.predict(img)
results = decode_predictions(predictions, top=3)[0]

# Show results
for i, (id, label, score) in enumerate(results):
    print(f"{i+1}. {label}: {score*100:.2f}%")

MATLAB Code

% Load pre-trained network (AlexNet or GoogLeNet)
net = alexnet;

% Load and resize image
img = imread('dog.jpg');
img = imresize(img, [227, 227]);

% Predict
label = classify(net, img);
disp(['Prediction: ' char(label)]);

4. Method 2: Train Your Own Classifier (Simple)

Problem: Classify 3 types of fruits – Apple, Banana, Orange

Python Code (Complete Training Pipeline)

import cv2
import os
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Step 1: Load images and extract simple features (color histograms)
def extract_features(image_path):
    img = cv2.imread(image_path)
    img = cv2.resize(img, (100, 100))
    
    # Convert to HSV (better for colors)
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    
    # Get color histogram as feature
    hist_hue = cv2.calcHist([hsv], [0], None, [32], [0, 180])
    hist_sat = cv2.calcHist([hsv], [1], None, [32], [0, 256])
    
    # Flatten and combine
    features = np.concatenate([hist_hue.flatten(), hist_sat.flatten()])
    return features

# Step 2: Load training data
X = []  # Features
y = []  # Labels

# Assuming folder structure: data/apple/, data/banana/, data/orange/
folders = ['apple', 'banana', 'orange']
label_map = {'apple': 0, 'banana': 1, 'orange': 2}

for folder in folders:
    path = f'data/{folder}/'
    for file in os.listdir(path):
        if file.endswith('.jpg'):
            features = extract_features(path + file)
            X.append(features)
            y.append(label_map[folder])

# Step 3: Split into train and test
X = np.array(X)
y = np.array(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Train classifier
classifier = SVC(kernel='rbf', gamma='auto')
classifier.fit(X_train, y_train)

# Step 5: Test
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Step 6: Predict new image
new_img_features = extract_features('test_fruit.jpg')
prediction = classifier.predict([new_img_features])
fruit_name = list(label_map.keys())[list(label_map.values()).index(prediction[0])]
print(f"This is: {fruit_name}")

MATLAB Code

% Simple fruit classifier using color features

% Load training images
apple_images = imageDatastore('data/apple', 'IncludeSubfolders', false);
banana_images = imageDatastore('data/banana', 'IncludeSubfolders', false);
orange_images = imageDatastore('data/orange', 'IncludeSubfolders', false);

% Extract features
features = [];
labels = [];

% For apple
for i = 1:numel(apple_images.Files)
    img = imread(apple_images.Files{i});
    hsv = rgb2hsv(img);
    hist_hue = imhist(hsv(:,:,1), 32);
    features = [features; hist_hue'];
    labels = [labels; 'apple'];
end

% For banana (similar code)
% For orange (similar code)

% Train classifier
classifier = fitcecoc(features, labels);

% Test
test_img = imread('test_fruit.jpg');
test_hsv = rgb2hsv(test_img);
test_hist = imhist(test_hsv(:,:,1), 32);
prediction = predict(classifier, test_hist');
disp(['Predicted: ' char(prediction)]);

5. Method 3: Deep Learning for Beginners (CNN)

What is CNN? A neural network that automatically learns edges, shapes, and objects from images.

Architecture (Simple):

  • Input Layer (224x224 image)
  • Convolution Layer (finds edges)
  • Pooling Layer (reduces size)
  • Convolution Layer (finds shapes)
  • Pooling Layer
  • Dense Layer (decides)
  • Output (cat or dog)

Python Code (Simple CNN from Scratch)

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt

# Step 1: Load dataset (CIFAR-10 - 10 classes of objects)
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

# Normalize pixel values (0 to 1)
x_train = x_train / 255.0
x_test = x_test / 255.0

# Class names
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 
               'dog', 'frog', 'horse', 'ship', 'truck']

# Step 2: Build the CNN model
model = keras.Sequential([
    # First convolution block
    layers.Conv2D(32, (3,3), activation='relu', input_shape=(32,32,3)),
    layers.MaxPooling2D((2,2)),
    
    # Second convolution block
    layers.Conv2D(64, (3,3), activation='relu'),
    layers.MaxPooling2D((2,2)),
    
    # Third convolution block
    layers.Conv2D(64, (3,3), activation='relu'),
    
    # Dense layers for classification
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
])

# Step 3: Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Step 4: Train the model
history = model.fit(x_train, y_train, 
                    epochs=10, 
                    validation_data=(x_test, y_test))

# Step 5: Evaluate
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test accuracy: {test_acc * 100:.2f}%")

# Step 6: Predict on new image
def predict_image(image_path):
    img = keras.preprocessing.image.load_img(image_path, target_size=(32,32))
    img_array = keras.preprocessing.image.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    
    predictions = model.predict(img_array)
    predicted_class = class_names[np.argmax(predictions)]
    print(f"This is: {predicted_class}")
    return predicted_class

# Test it
predict_image('my_cat.jpg')

MATLAB Code (Simple CNN)

% Load CIFAR-10 dataset
[XTrain, YTrain, XTest, YTest] = helperCIFAR10Data.load();

% Define CNN layers
layers = [
    imageInputLayer([32 32 3])
    
    convolution2dLayer(3, 32, 'Padding', 'same')
    batchNormalizationLayer()
    reluLayer()
    maxPooling2dLayer(2, 'Stride', 2)
    
    convolution2dLayer(3, 64, 'Padding', 'same')
    batchNormalizationLayer()
    reluLayer()
    maxPooling2dLayer(2, 'Stride', 2)
    
    fullyConnectedLayer(10)
    softmaxLayer()
    classificationLayer()
];

% Training options
options = trainingOptions('adam', ...
    'MaxEpochs', 10, ...
    'Shuffle', 'every-epoch', ...
    'Plots', 'training-progress');

% Train
net = trainNetwork(XTrain, YTrain, layers, options);

% Test
predictions = classify(net, XTest);
accuracy = mean(predictions == YTest);
disp(['Accuracy: ' num2str(accuracy*100) '%']);

6. Method 4: Transfer Learning (Use Pre-trained Models)

Why reinvent the wheel? Use models already trained on millions of images.

Python Code (Using MobileNetV2)

from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input, decode_predictions
from tensorflow.keras.preprocessing import image
import numpy as np

# Load pre-trained model
model = MobileNetV2(weights='imagenet')

# Load and prepare image
img_path = 'elephant.jpg'
img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)

# Predict
preds = model.predict(x)
results = decode_predictions(preds, top=3)[0]

print("Top 3 predictions:")
for i, (imagenet_id, label, score) in enumerate(results):
    print(f"  {i+1}. {label}: {score*100:.2f}%")

Output example:

Top 3 predictions:
  1. African_elephant: 92.45%
  2. tusker: 5.23%
  3. Indian_elephant: 2.15%

7. Real-World Example: Face Mask Detector

Problem: Detect if a person is wearing a mask or not.

Complete Python Code

import cv2
import os
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
import pickle

# Step 1: Dataset preparation (you need images of 'with_mask' and 'without_mask')
def load_dataset(data_path):
    X = []
    y = []
    
    # 'with_mask' folder
    for file in os.listdir(f'{data_path}/with_mask'):
        img = cv2.imread(f'{data_path}/with_mask/{file}')
        img = cv2.resize(img, (100, 100))
        X.append(img.flatten())  # Convert image to 1D array
        y.append(1)  # 1 = mask
    
    # 'without_mask' folder
    for file in os.listdir(f'{data_path}/without_mask'):
        img = cv2.imread(f'{data_path}/without_mask/{file}')
        img = cv2.resize(img, (100, 100))
        X.append(img.flatten())
        y.append(0)  # 0 = no mask
    
    return np.array(X), np.array(y)

# Load data
X, y = load_dataset('face_mask_dataset')

# Train model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
classifier = SVC(kernel='linear')
classifier.fit(X_train, y_train)

# Save model
with open('mask_detector.pkl', 'wb') as f:
    pickle.dump(classifier, f)

# Real-time detection using webcam
def detect_mask_live():
    # Load model
    with open('mask_detector.pkl', 'rb') as f:
        model = pickle.load(f)
    
    # Start webcam
    cap = cv2.VideoCapture(0)
    
    while True:
        ret, frame = cap.read()
        
        # Detect face (using Haar cascade)
        face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, 1.1, 4)
        
        for (x, y, w, h) in faces:
            # Extract face region
            face = frame[y:y+h, x:x+w]
            face = cv2.resize(face, (100, 100))
            face_flat = face.flatten().reshape(1, -1)
            
            # Predict
            prediction = model.predict(face_flat)[0]
            
            # Draw result
            if prediction == 1:
                color = (0, 255, 0)  # Green
                text = "MASK ON"
            else:
                color = (0, 0, 255)  # Red
                text = "NO MASK"
            
            cv2.rectangle(frame, (x, y), (x+w, y+h), color, 2)
            cv2.putText(frame, text, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
        
        cv2.imshow('Mask Detector', frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()

# Run live detection
detect_mask_live()

8. Quick Comparison: Traditional vs Machine Learning vs Deep Learning

9. When to Use What?

10. Practice Exercise for Students

Task: Build a "Handwritten Digit Recognizer"

Dataset: MNIST (70,000 images of digits 0-9)

Python Solution:

import tensorflow as tf
import matplotlib.pyplot as plt

# Load dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Normalize
x_train = x_train / 255.0
x_test = x_test / 255.0

# Build model
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation='softmax')
])

# Compile
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train
model.fit(x_train, y_train, epochs=5)

# Test
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test accuracy: {test_acc * 100:.2f}%")

# Test on your own digit drawing
# Save your drawing as 28x28 image and run:
# prediction = model.predict(your_digit_image)

Expected output: >97% accuracy

11. Common Problems & Solutions

12. Summary Table

Advice for Students

  1. Start with pre-trained models (don't train from scratch)
  2. Use Google Colab if you don't have a good GPU
  3. Small dataset? Use Transfer Learning
  4. Always normalize your images (divide by 255)
  5. Save your trained model so you don't retrain every time
# Save model
model.save('my_model.h5')

# Load later
from tensorflow.keras.models import load_model
model = load_model('my_model.h5')