Linux System Troubleshooting
This guide covers essential troubleshooting commands for diagnosing and resolving Linux system issues.
System Performance Troubleshooting
CPU Usage Diagnosis
# Real-time CPU monitoring
top
htop
glances
# CPU usage per process
ps aux --sort=-%cpu | head -10
ps -eo pid,ppid,cmd,%mem,%cpu --sort=-%cpu | head
# Load average
uptime
cat /proc/loadavg
# Per-CPU statistics
mpstat -P ALL 1
sar -u 1 10
# Find high CPU processes
pidstat 1
Memory Issues
# Memory usage overview
free -h
free -m
cat /proc/meminfo
# Memory usage by process
ps aux --sort=-%mem | head -10
pmap -x PID
# Check for memory leaks
vmstat 1
sar -r 1 10
# OOM killer logs
dmesg | grep -i "out of memory"
grep -i "killed process" /var/log/messages
# Clear cache (emergency only)
sync
echo 3 > /proc/sys/vm/drop_caches
Disk I/O Problems
# Disk I/O statistics
iostat -x 1
iotop
# Per-process I/O
iotop -o
pidstat -d 1
# Disk usage
df -h
df -i # Inode usage
du -sh /*
du -h --max-depth=1 / | sort -hr
# Find large files
find / -type f -size +100M
find /var -type f -exec du -h {} + | sort -rh | head -20
# Check disk health
smartctl -a /dev/sda
badblocks -v /dev/sda
Service and Process Troubleshooting
Service Issues
# Check service status
systemctl status service_name
systemctl is-active service_name
systemctl is-enabled service_name
systemctl is-failed service_name
# List failed services
systemctl --failed
systemctl list-units --state=failed
# Service logs
journalctl -u service_name
journalctl -u service_name -f
journalctl -u service_name --since "1 hour ago"
journalctl -u service_name -p err
# Restart service with debug
systemctl restart service_name
systemctl status -l service_name
Process Debugging
# Find zombie processes
ps aux | grep Z
ps -eo stat,ppid,pid,cmd | grep -e '^[Zz]'
# Trace system calls
strace -p PID
strace -f -e open command
ltrace command
# Debug with lsof
lsof -p PID
lsof -u username
lsof -i :80
lsof | grep deleted # Find deleted but open files
# Check open files limit
ulimit -n
cat /proc/sys/fs/file-max
lsof | wc -l
Network Troubleshooting
Connectivity Issues
# Test basic connectivity
ping -c 4 google.com
ping -c 4 8.8.8.8
# Trace route path
traceroute google.com
traceroute -n google.com
mtr google.com
# DNS resolution
nslookup google.com
dig google.com
host google.com
# Check listening ports
netstat -tulpn
ss -tulpn
lsof -i -P -n
# Check established connections
netstat -anp | grep ESTABLISHED
ss -tnp
Network Interface Issues
# Interface status
ip link show
ip addr show
ethtool eth0
# Interface statistics
ip -s link show eth0
ifconfig eth0
cat /proc/net/dev
# Packet errors
netstat -i
ip -s -s link show
# Check cable connection
ethtool eth0 | grep "Link detected"
mii-tool eth0
# Reset interface
ip link set eth0 down
ip link set eth0 up
Firewall Debugging
# Check iptables rules
iptables -L -n -v
iptables -t nat -L -n -v
# Check UFW status
ufw status verbose
ufw show added
# Check firewalld
firewall-cmd --list-all
firewall-cmd --get-active-zones
# Test port accessibility
nc -zv hostname 80
telnet hostname 80
nmap -p 80 hostname
# Capture packets
tcpdump -i any port 80
tcpdump -i eth0 -w capture.pcap
Boot and System Issues
Boot Problems
# Check boot logs
journalctl -b
journalctl -b -1 # Previous boot
dmesg
dmesg | grep -i error
# Check GRUB configuration
cat /boot/grub/grub.cfg
update-grub
# Check fstab errors
cat /etc/fstab
mount -a
findmnt --verify
# Rescue mode
# Boot with: init=/bin/bash
mount -o remount,rw /
passwd root
sync
reboot -f
Kernel Issues
# Current kernel
uname -r
uname -a
# Available kernels
dpkg --list | grep linux-image
rpm -qa | grep kernel
# Kernel messages
dmesg | less
dmesg -T | grep -i error
# Kernel parameters
sysctl -a
cat /proc/cmdline
# Load kernel modules
lsmod
modprobe module_name
modinfo module_name
Disk and Filesystem Issues
Filesystem Errors
# Check filesystem
fsck /dev/sda1 # Unmounted only
e2fsck -f /dev/sda1
xfs_repair /dev/sda1
# Check for bad blocks
badblocks -v /dev/sda1
# Mount issues
mount -a
mount | column -t
findmnt
# Check disk for errors
smartctl -a /dev/sda
smartctl -H /dev/sda
# Inode exhaustion
df -i
find / -xdev -type f | cut -d "/" -f 2 | sort | uniq -c | sort -nr
Disk Full Issues
# Find what's using space
du -sh /* | sort -hr
ncdu /
# Find large files
find / -type f -size +1G
find /var -type f -size +100M -exec ls -lh {} \;
# Find deleted files still open
lsof | grep deleted
lsof +L1
# Check log files
du -sh /var/log/*
journalctl --vacuum-size=100M
journalctl --vacuum-time=7d
# Clean package cache
apt clean # Ubuntu/Debian
yum clean all # CentOS/RHEL
User and Permission Issues
Permission Problems
# Check file permissions
ls -la filename
stat filename
namei -l /path/to/file
# Find files with specific permissions
find / -perm 777
find / -perm -4000 # SUID files
# Check user permissions
id username
groups username
sudo -l -U username
# Fix ownership
chown -R user:group /directory
find /path -type d -exec chmod 755 {} \;
find /path -type f -exec chmod 644 {} \;
Login Issues
# Check failed logins
lastb
faillog -a
# Check successful logins
last
lastlog
# PAM configuration
cat /etc/pam.d/common-auth
cat /etc/pam.d/sshd
# SSH debugging
ssh -vvv user@host
tail -f /var/log/auth.log
# Password issues
passwd -S username
chage -l username
Application-Specific Troubleshooting
Web Server Issues
# Apache/Nginx logs
tail -f /var/log/apache2/error.log
tail -f /var/log/nginx/error.log
# Test configuration
apache2ctl configtest
nginx -t
# Check web server status
systemctl status apache2
systemctl status nginx
# Check listening ports
netstat -tlnp | grep :80
ss -tlnp | grep :80
Database Issues
# MySQL/MariaDB logs
tail -f /var/log/mysql/error.log
# PostgreSQL logs
tail -f /var/log/postgresql/postgresql-13-main.log
# Check database status
systemctl status mysql
systemctl status postgresql
# Connection test
mysql -u root -p
psql -U postgres
# Check processes
ps aux | grep mysql
ps aux | grep postgres
Log File Analysis
System Logs
# View system logs
journalctl -xe
journalctl -f
journalctl --since "1 hour ago"
journalctl --since "2024-01-01" --until "2024-01-02"
# Traditional logs
tail -f /var/log/syslog
tail -f /var/log/messages
grep -i error /var/log/syslog
# Boot logs
journalctl -b
dmesg
# Kernel logs
dmesg | grep -i error
dmesg -T | less
Application Logs
# Find log files
find /var/log -type f -name "*.log"
ls -lht /var/log | head -20
# Search in logs
grep -r "error" /var/log/
zgrep "pattern" /var/log/*.gz
# Real-time monitoring
tail -f /var/log/syslog
multitail /var/log/syslog /var/log/auth.log
# Analyze logs
cat /var/log/syslog | grep -i "error" | wc -l
awk '/error/ {print}' /var/log/syslog
Performance Analysis Tools
System Profiling
# CPU profiling
perf top
perf record -a sleep 10
perf report
# System calls
strace -c command
strace -T command
# IO profiling
iotop
iostat -x 1
# Network profiling
iftop
nethogs
Comprehensive Monitoring
# All-in-one monitoring
glances
atop
nmon
# Resource usage
sar -A
vmstat 1
mpstat -P ALL 1
# Custom monitoring
watch -n 1 "ps aux | grep process"
watch -n 1 "df -h"
Emergency Recovery
System Recovery
# Boot into single-user mode
# Add to kernel line: single or init=/bin/bash
# Remount root read-write
mount -o remount,rw /
# Check and repair filesystem
fsck -y /dev/sda1
# Reset root password
passwd root
# Fix GRUB
grub-install /dev/sda
update-grub
# Reinstall packages
apt install --reinstall package # Ubuntu
yum reinstall package # CentOS
Conclusion
This comprehensive troubleshooting guide covers the most common Linux system issues. VCCLHOSTING provides 24/7 expert support to help diagnose and resolve any server problems quickly.