PyTorch Installation and Essential Commands for Deep Learning

PyTorch Installation and Setup

Comprehensive guide to installing PyTorch with GPU acceleration and essential deep learning workflows.

PyTorch Installation

Install PyTorch (CPU Version)

# Install PyTorch CPU
pip install torch torchvision torchaudio

# Install specific version
pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0

# Verify installation
python -c "import torch; print(torch.__version__)"
python -c "import torch; print(torch.cuda.is_available())"

Install PyTorch with CUDA Support

# PyTorch with CUDA 12.1
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

# PyTorch with CUDA 11.8
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

# Verify CUDA installation
python -c "import torch; print(f'CUDA available: {torch.cuda.is_available()}')"
python -c "import torch; print(f'CUDA version: {torch.version.cuda}')"
python -c "import torch; print(f'GPU count: {torch.cuda.device_count()}')"
python -c "import torch; print(f'GPU name: {torch.cuda.get_device_name(0)}')"

Install from Conda

# Install PyTorch with conda (CUDA 12.1)
conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia

# Install CPU version
conda install pytorch torchvision torchaudio cpuonly -c pytorch

Essential PyTorch Commands

Tensor Operations

# Import PyTorch
import torch
import numpy as np

# Create tensors
tensor_a = torch.tensor([1, 2, 3, 4])
tensor_b = torch.zeros(3, 3)
tensor_c = torch.ones(2, 2)
tensor_d = torch.rand(2, 3)  # Random [0, 1)
tensor_e = torch.randn(2, 3)  # Normal distribution

# Tensor from NumPy
np_array = np.array([1, 2, 3])
tensor_f = torch.from_numpy(np_array)

# Tensor to NumPy
numpy_array = tensor_a.numpy()

# Tensor info
print(tensor_b.shape)
print(tensor_b.dtype)
print(tensor_b.device)

# Tensor operations
result = tensor_a + 10
result = tensor_a * 2
result = torch.matmul(tensor_b, tensor_b)
result = tensor_a.sum()
result = tensor_a.mean()
result = tensor_a.max()

GPU Operations

# Check GPU availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

# Move tensor to GPU
tensor_gpu = tensor_a.to(device)
tensor_gpu = tensor_a.cuda()

# Move back to CPU
tensor_cpu = tensor_gpu.to('cpu')
tensor_cpu = tensor_gpu.cpu()

# Create tensor directly on GPU
tensor_gpu = torch.rand(3, 3, device='cuda')

# Get GPU memory info
print(f'Allocated: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB')
print(f'Cached: {torch.cuda.memory_reserved(0) / 1e9:.2f} GB')

# Clear cache
torch.cuda.empty_cache()

Building Neural Networks

Define Neural Network with nn.Module

import torch.nn as nn
import torch.nn.functional as F

# Simple feedforward network
class SimpleNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

# Create model instance
model = SimpleNet(input_size=784, hidden_size=128, num_classes=10)

# Move model to GPU
model = model.to(device)

# Print model architecture
print(model)

# Count parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'Total parameters: {total_params}')

Convolutional Neural Network (CNN)

class CNN(nn.Module):
    def __init__(self, num_classes=10):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, num_classes)
        self.dropout = nn.Dropout(0.5)
    
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

model = CNN(num_classes=10).to(device)

Sequential Model

# Using nn.Sequential
model = nn.Sequential(
    nn.Linear(784, 256),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(256, 128),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(128, 10)
).to(device)

Training Loop

Complete Training Example

import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Prepare data
train_dataset = TensorDataset(x_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Define model, loss, optimizer
model = SimpleNet(784, 128, 10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    
    for batch_idx, (data, targets) in enumerate(train_loader):
        # Move data to device
        data = data.to(device)
        targets = targets.to(device)
        
        # Forward pass
        outputs = model(data)
        loss = criterion(outputs, targets)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        if (batch_idx + 1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{batch_idx+1}/{len(train_loader)}], Loss: {loss.item():.4f}')
    
    avg_loss = running_loss / len(train_loader)
    print(f'Epoch [{epoch+1}/{num_epochs}], Average Loss: {avg_loss:.4f}')

Model Evaluation

# Evaluation function
def evaluate(model, test_loader, device):
    model.eval()
    correct = 0
    total = 0
    
    with torch.no_grad():
        for data, targets in test_loader:
            data = data.to(device)
            targets = targets.to(device)
            
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
    
    accuracy = 100 * correct / total
    print(f'Test Accuracy: {accuracy:.2f}%')
    return accuracy

# Run evaluation
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
evaluate(model, test_loader, device)

Data Loading and Augmentation

Using torchvision Datasets

from torchvision import datasets, transforms

# Define transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load MNIST dataset
train_dataset = datasets.MNIST(
    root='./data',
    train=True,
    download=True,
    transform=transform
)

test_dataset = datasets.MNIST(
    root='./data',
    train=False,
    download=True,
    transform=transform
)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

Data Augmentation

# Advanced augmentation
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomCrop(32, padding=4),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# For images
from torchvision.io import read_image
img = read_image('path/to/image.jpg')
transformed_img = train_transform(img)

Save and Load Models

Model Checkpointing

# Save entire model
torch.save(model, 'model_complete.pth')

# Load entire model
model = torch.load('model_complete.pth')

# Save model state dict (recommended)
torch.save(model.state_dict(), 'model_weights.pth')

# Load model state dict
model = SimpleNet(784, 128, 10)
model.load_state_dict(torch.load('model_weights.pth'))
model.to(device)

# Save checkpoint with optimizer state
checkpoint = {
    'epoch': epoch,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'loss': loss,
}
torch.save(checkpoint, 'checkpoint.pth')

# Load checkpoint
checkpoint = torch.load('checkpoint.pth')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']

Transfer Learning

Using Pre-trained Models

from torchvision import models

# Load pre-trained ResNet
model = models.resnet50(pretrained=True)

# Freeze all layers
for param in model.parameters():
    param.requires_grad = False

# Replace final layer
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, num_classes)

# Fine-tune specific layers
for param in model.layer4.parameters():
    param.requires_grad = True

# Move to device
model = model.to(device)

# Available models
# resnet18, resnet34, resnet50, resnet101, resnet152
# vgg16, vgg19
# densenet121, densenet169, densenet201
# mobilenet_v2, mobilenet_v3_large
# efficientnet_b0 to efficientnet_b7

Learning Rate Scheduling

Learning Rate Schedulers

from torch.optim import lr_scheduler

# Step LR
scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

# Multi-step LR
scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[30, 80], gamma=0.1)

# Exponential LR
scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.95)

# Reduce on plateau
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)

# Cosine annealing
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=50)

# Use in training loop
for epoch in range(num_epochs):
    train_one_epoch()
    val_loss = validate()
    scheduler.step()  # or scheduler.step(val_loss) for ReduceLROnPlateau

Mixed Precision Training

Automatic Mixed Precision (AMP)

from torch.cuda.amp import autocast, GradScaler

# Create gradient scaler
scaler = GradScaler()

# Training loop with AMP
for epoch in range(num_epochs):
    for data, targets in train_loader:
        data, targets = data.to(device), targets.to(device)
        
        optimizer.zero_grad()
        
        # Forward pass with autocast
        with autocast():
            outputs = model(data)
            loss = criterion(outputs, targets)
        
        # Backward pass with scaler
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

PyTorch Troubleshooting

Common Issues

# CUDA out of memory
# Solution 1: Reduce batch size
train_loader = DataLoader(dataset, batch_size=16)

# Solution 2: Clear cache
torch.cuda.empty_cache()

# Solution 3: Use gradient accumulation
accumulation_steps = 4
for i, (data, targets) in enumerate(train_loader):
    outputs = model(data)
    loss = criterion(outputs, targets) / accumulation_steps
    loss.backward()
    
    if (i + 1) % accumulation_steps == 0:
        optimizer.step()
        optimizer.zero_grad()

# Gradient explosion
# Use gradient clipping
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

# Check for NaN/Inf
if torch.isnan(loss) or torch.isinf(loss):
    print("NaN or Inf detected in loss")

# Device mismatch error
# Ensure all tensors are on same device
data = data.to(device)
targets = targets.to(device)
model = model.to(device)

# Memory leak
# Always use torch.no_grad() for inference
with torch.no_grad():
    outputs = model(data)

Conclusion

This comprehensive guide covers PyTorch installation and essential deep learning workflows. VCCLHOSTING GPU servers provide optimized PyTorch environments for maximum performance.