PyTorch Installation and Setup
Comprehensive guide to installing PyTorch with GPU acceleration and essential deep learning workflows.
PyTorch Installation
Install PyTorch (CPU Version)
# Install PyTorch CPU
pip install torch torchvision torchaudio
# Install specific version
pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0
# Verify installation
python -c "import torch; print(torch.__version__)"
python -c "import torch; print(torch.cuda.is_available())"
Install PyTorch with CUDA Support
# PyTorch with CUDA 12.1
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
# PyTorch with CUDA 11.8
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
# Verify CUDA installation
python -c "import torch; print(f'CUDA available: {torch.cuda.is_available()}')"
python -c "import torch; print(f'CUDA version: {torch.version.cuda}')"
python -c "import torch; print(f'GPU count: {torch.cuda.device_count()}')"
python -c "import torch; print(f'GPU name: {torch.cuda.get_device_name(0)}')"
Install from Conda
# Install PyTorch with conda (CUDA 12.1)
conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia
# Install CPU version
conda install pytorch torchvision torchaudio cpuonly -c pytorch
Essential PyTorch Commands
Tensor Operations
# Import PyTorch
import torch
import numpy as np
# Create tensors
tensor_a = torch.tensor([1, 2, 3, 4])
tensor_b = torch.zeros(3, 3)
tensor_c = torch.ones(2, 2)
tensor_d = torch.rand(2, 3) # Random [0, 1)
tensor_e = torch.randn(2, 3) # Normal distribution
# Tensor from NumPy
np_array = np.array([1, 2, 3])
tensor_f = torch.from_numpy(np_array)
# Tensor to NumPy
numpy_array = tensor_a.numpy()
# Tensor info
print(tensor_b.shape)
print(tensor_b.dtype)
print(tensor_b.device)
# Tensor operations
result = tensor_a + 10
result = tensor_a * 2
result = torch.matmul(tensor_b, tensor_b)
result = tensor_a.sum()
result = tensor_a.mean()
result = tensor_a.max()
GPU Operations
# Check GPU availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')
# Move tensor to GPU
tensor_gpu = tensor_a.to(device)
tensor_gpu = tensor_a.cuda()
# Move back to CPU
tensor_cpu = tensor_gpu.to('cpu')
tensor_cpu = tensor_gpu.cpu()
# Create tensor directly on GPU
tensor_gpu = torch.rand(3, 3, device='cuda')
# Get GPU memory info
print(f'Allocated: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB')
print(f'Cached: {torch.cuda.memory_reserved(0) / 1e9:.2f} GB')
# Clear cache
torch.cuda.empty_cache()
Building Neural Networks
Define Neural Network with nn.Module
import torch.nn as nn
import torch.nn.functional as F
# Simple feedforward network
class SimpleNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(SimpleNet, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(hidden_size, num_classes)
def forward(self, x):
out = self.fc1(x)
out = self.relu(out)
out = self.fc2(out)
return out
# Create model instance
model = SimpleNet(input_size=784, hidden_size=128, num_classes=10)
# Move model to GPU
model = model.to(device)
# Print model architecture
print(model)
# Count parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'Total parameters: {total_params}')
Convolutional Neural Network (CNN)
class CNN(nn.Module):
def __init__(self, num_classes=10):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
self.pool = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(64 * 7 * 7, 128)
self.fc2 = nn.Linear(128, num_classes)
self.dropout = nn.Dropout(0.5)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 64 * 7 * 7)
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = self.fc2(x)
return x
model = CNN(num_classes=10).to(device)
Sequential Model
# Using nn.Sequential
model = nn.Sequential(
nn.Linear(784, 256),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(256, 128),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(128, 10)
).to(device)
Training Loop
Complete Training Example
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
# Prepare data
train_dataset = TensorDataset(x_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# Define model, loss, optimizer
model = SimpleNet(784, 128, 10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Training loop
num_epochs = 10
for epoch in range(num_epochs):
model.train()
running_loss = 0.0
for batch_idx, (data, targets) in enumerate(train_loader):
# Move data to device
data = data.to(device)
targets = targets.to(device)
# Forward pass
outputs = model(data)
loss = criterion(outputs, targets)
# Backward pass and optimization
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item()
if (batch_idx + 1) % 100 == 0:
print(f'Epoch [{epoch+1}/{num_epochs}], Step [{batch_idx+1}/{len(train_loader)}], Loss: {loss.item():.4f}')
avg_loss = running_loss / len(train_loader)
print(f'Epoch [{epoch+1}/{num_epochs}], Average Loss: {avg_loss:.4f}')
Model Evaluation
# Evaluation function
def evaluate(model, test_loader, device):
model.eval()
correct = 0
total = 0
with torch.no_grad():
for data, targets in test_loader:
data = data.to(device)
targets = targets.to(device)
outputs = model(data)
_, predicted = torch.max(outputs.data, 1)
total += targets.size(0)
correct += (predicted == targets).sum().item()
accuracy = 100 * correct / total
print(f'Test Accuracy: {accuracy:.2f}%')
return accuracy
# Run evaluation
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
evaluate(model, test_loader, device)
Data Loading and Augmentation
Using torchvision Datasets
from torchvision import datasets, transforms
# Define transformations
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
# Load MNIST dataset
train_dataset = datasets.MNIST(
root='./data',
train=True,
download=True,
transform=transform
)
test_dataset = datasets.MNIST(
root='./data',
train=False,
download=True,
transform=transform
)
# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
Data Augmentation
# Advanced augmentation
train_transform = transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(10),
transforms.RandomCrop(32, padding=4),
transforms.ColorJitter(brightness=0.2, contrast=0.2),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
# For images
from torchvision.io import read_image
img = read_image('path/to/image.jpg')
transformed_img = train_transform(img)
Save and Load Models
Model Checkpointing
# Save entire model
torch.save(model, 'model_complete.pth')
# Load entire model
model = torch.load('model_complete.pth')
# Save model state dict (recommended)
torch.save(model.state_dict(), 'model_weights.pth')
# Load model state dict
model = SimpleNet(784, 128, 10)
model.load_state_dict(torch.load('model_weights.pth'))
model.to(device)
# Save checkpoint with optimizer state
checkpoint = {
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': loss,
}
torch.save(checkpoint, 'checkpoint.pth')
# Load checkpoint
checkpoint = torch.load('checkpoint.pth')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']
Transfer Learning
Using Pre-trained Models
from torchvision import models
# Load pre-trained ResNet
model = models.resnet50(pretrained=True)
# Freeze all layers
for param in model.parameters():
param.requires_grad = False
# Replace final layer
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, num_classes)
# Fine-tune specific layers
for param in model.layer4.parameters():
param.requires_grad = True
# Move to device
model = model.to(device)
# Available models
# resnet18, resnet34, resnet50, resnet101, resnet152
# vgg16, vgg19
# densenet121, densenet169, densenet201
# mobilenet_v2, mobilenet_v3_large
# efficientnet_b0 to efficientnet_b7
Learning Rate Scheduling
Learning Rate Schedulers
from torch.optim import lr_scheduler
# Step LR
scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
# Multi-step LR
scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[30, 80], gamma=0.1)
# Exponential LR
scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.95)
# Reduce on plateau
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)
# Cosine annealing
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=50)
# Use in training loop
for epoch in range(num_epochs):
train_one_epoch()
val_loss = validate()
scheduler.step() # or scheduler.step(val_loss) for ReduceLROnPlateau
Mixed Precision Training
Automatic Mixed Precision (AMP)
from torch.cuda.amp import autocast, GradScaler
# Create gradient scaler
scaler = GradScaler()
# Training loop with AMP
for epoch in range(num_epochs):
for data, targets in train_loader:
data, targets = data.to(device), targets.to(device)
optimizer.zero_grad()
# Forward pass with autocast
with autocast():
outputs = model(data)
loss = criterion(outputs, targets)
# Backward pass with scaler
scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()
PyTorch Troubleshooting
Common Issues
# CUDA out of memory
# Solution 1: Reduce batch size
train_loader = DataLoader(dataset, batch_size=16)
# Solution 2: Clear cache
torch.cuda.empty_cache()
# Solution 3: Use gradient accumulation
accumulation_steps = 4
for i, (data, targets) in enumerate(train_loader):
outputs = model(data)
loss = criterion(outputs, targets) / accumulation_steps
loss.backward()
if (i + 1) % accumulation_steps == 0:
optimizer.step()
optimizer.zero_grad()
# Gradient explosion
# Use gradient clipping
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
# Check for NaN/Inf
if torch.isnan(loss) or torch.isinf(loss):
print("NaN or Inf detected in loss")
# Device mismatch error
# Ensure all tensors are on same device
data = data.to(device)
targets = targets.to(device)
model = model.to(device)
# Memory leak
# Always use torch.no_grad() for inference
with torch.no_grad():
outputs = model(data)
Conclusion
This comprehensive guide covers PyTorch installation and essential deep learning workflows. VCCLHOSTING GPU servers provide optimized PyTorch environments for maximum performance.