Backup & Restore

pycopg provides convenient wrappers around PostgreSQL’s backup and restore tools.

pg_dump

Full Database Backup

# Custom format (compressed, recommended)
db.pg_dump("backup.dump")

# Plain SQL format
db.pg_dump("backup.sql", format="plain")

# Directory format (for parallel backup)
db.pg_dump("backup_dir", format="directory")

# Tar format
db.pg_dump("backup.tar", format="tar")

Backup Options

db.pg_dump(
    "backup.dump",
    format="custom",      # 'plain', 'custom', 'directory', 'tar'
    schema_only=False,    # Only schema, no data
    data_only=False,      # Only data, no schema
    tables=None,          # List of tables to include
    exclude_tables=None,  # List of tables to exclude
    schemas=None,         # List of schemas to include
    compress=6,           # Compression level (0-9, custom format)
    jobs=1,               # Parallel jobs (directory format)
)

Schema Only

# Export only table definitions
db.pg_dump("schema.sql", format="plain", schema_only=True)

Data Only

# Export only data (for tables that already exist)
db.pg_dump("data.dump", data_only=True)

Specific Tables

# Backup specific tables
db.pg_dump("users.dump", tables=["users", "user_profiles", "user_settings"])

# Exclude tables
db.pg_dump("backup.dump", exclude_tables=["logs", "sessions", "temp_data"])

Specific Schemas

# Backup specific schemas
db.pg_dump("app.dump", schemas=["app", "auth"])

Parallel Backup

# Use directory format with multiple jobs
db.pg_dump("backup_dir", format="directory", jobs=4)

pg_restore

Full Restore

# Restore from custom format
db.pg_restore("backup.dump")

# From directory format
db.pg_restore("backup_dir")

Restore Options

db.pg_restore(
    "backup.dump",
    clean=False,           # Drop objects before recreating
    if_exists=True,        # Use IF EXISTS with clean
    create=False,          # Create database before restoring
    data_only=False,       # Restore only data
    schema_only=False,     # Restore only schema
    tables=None,           # Only restore these tables
    schemas=None,          # Only restore these schemas
    jobs=1,                # Parallel jobs
    no_owner=False,        # Don't restore ownership
    no_privileges=False,   # Don't restore privileges
)

Clean Restore

# Drop and recreate all objects
db.pg_restore("backup.dump", clean=True, if_exists=True)

Parallel Restore

# Restore with multiple jobs
db.pg_restore("backup_dir", jobs=4)

Restore Specific Tables

# Restore only certain tables
db.pg_restore("backup.dump", tables=["users", "orders"])

Restore Without Ownership

# Useful when restoring to a different environment
db.pg_restore("backup.dump", no_owner=True, no_privileges=True)

Plain SQL Restore

For plain SQL backups, pycopg uses psql automatically.

db.pg_restore("backup.sql")  # Automatically uses psql for .sql files

CSV Export/Import

Export to CSV

# Basic export
rows = db.copy_to_csv("users", "users.csv")
print(f"Exported {rows} rows")

# With options
db.copy_to_csv(
    "users",
    "users.csv",
    schema="public",
    columns=["id", "name", "email"],  # Specific columns
    delimiter=",",
    header=True,
    null_string="",
    encoding="UTF8",
)

Import from CSV

# Basic import
rows = db.copy_from_csv("users", "users.csv")
print(f"Imported {rows} rows")

# With options
db.copy_from_csv(
    "users",
    "users.csv",
    schema="public",
    columns=["id", "name", "email"],  # Map to specific columns
    delimiter=",",
    header=True,
    null_string="",
    encoding="UTF8",
)

Complete Backup Strategy

Development Backup

from pycopg import Database
from datetime import datetime

db = Database.from_env()

# Daily backup with timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
db.pg_dump(f"backups/dev_{timestamp}.dump")

# Keep only schema in version control
db.pg_dump("migrations/schema.sql", format="plain", schema_only=True)

db.close()

Production Backup Script

from pycopg import Database
from datetime import datetime
from pathlib import Path
import os

def backup_database():
    db = Database.from_env()

    # Create backup directory
    backup_dir = Path("backups")
    backup_dir.mkdir(exist_ok=True)

    # Generate filename
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    backup_file = backup_dir / f"prod_{timestamp}.dump"

    # Full backup (parallel for speed)
    db.pg_dump(
        str(backup_file),
        format="custom",
        compress=9,
    )

    # Verify backup size
    size = backup_file.stat().st_size
    print(f"Backup created: {backup_file} ({size / 1024 / 1024:.1f} MB)")

    # Clean old backups (keep last 7 days)
    cutoff = datetime.now().timestamp() - (7 * 24 * 60 * 60)
    for old_backup in backup_dir.glob("prod_*.dump"):
        if old_backup.stat().st_mtime < cutoff:
            old_backup.unlink()
            print(f"Deleted old backup: {old_backup}")

    db.close()

if __name__ == "__main__":
    backup_database()

Migration with Backup

from pycopg import Database, Migrator
from datetime import datetime

def safe_migrate():
    db = Database.from_env()
    migrator = Migrator(db, "migrations/")

    # Check for pending migrations
    pending = migrator.pending()
    if not pending:
        print("No pending migrations")
        return

    print(f"Found {len(pending)} pending migrations")

    # Backup before migration
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    backup_file = f"backups/pre_migration_{timestamp}.dump"
    print(f"Creating backup: {backup_file}")
    db.pg_dump(backup_file)

    # Run migrations
    try:
        applied = migrator.migrate()
        for m in applied:
            print(f"Applied: {m}")
        print("Migration successful!")
    except Exception as e:
        print(f"Migration failed: {e}")
        print(f"Restore from: {backup_file}")
        raise

    db.close()

Clone Database

from pycopg import Database
import tempfile
import os

def clone_database(source_url: str, target_url: str):
    source_db = Database.from_url(source_url)
    target_db = Database.from_url(target_url)

    # Create temporary backup
    with tempfile.NamedTemporaryFile(suffix=".dump", delete=False) as f:
        backup_file = f.name

    try:
        # Backup source
        print("Backing up source database...")
        source_db.pg_dump(backup_file)

        # Restore to target (clean = drop existing objects)
        print("Restoring to target database...")
        target_db.pg_restore(backup_file, clean=True, no_owner=True)

        print("Clone complete!")
    finally:
        os.unlink(backup_file)
        source_db.close()
        target_db.close()

Error Handling

try:
    db.pg_dump("backup.dump")
except RuntimeError as e:
    print(f"Backup failed: {e}")
    # Handle error (e.g., retry, alert)

try:
    db.pg_restore("backup.dump")
except RuntimeError as e:
    print(f"Restore failed: {e}")
    # Handle error

Best Practices

1. Use Custom Format for Production

# Custom format is compressed and supports parallel restore
db.pg_dump("backup.dump", format="custom")

2. Test Restores Regularly

# Create test database
db.create_database("test_restore")

# Restore to test
test_db = Database(db.config.with_database("test_restore"))
test_db.pg_restore("backup.dump")

# Verify data
users = test_db.execute("SELECT COUNT(*) FROM users")
print(f"Users: {users[0]['count']}")

# Cleanup
test_db.close()
db.drop_database("test_restore")

3. Include Backup in CI/CD

# GitHub Actions example
- name: Backup before deploy
  run: |
    python -c "
    from pycopg import Database
    db = Database.from_env()
    db.pg_dump('pre_deploy_backup.dump')
    "

4. Monitor Backup Size

from pathlib import Path

backup_file = Path("backup.dump")
db.pg_dump(str(backup_file))

size_mb = backup_file.stat().st_size / 1024 / 1024
print(f"Backup size: {size_mb:.1f} MB")

# Alert if size changed significantly
if size_mb < expected_min_size:
    raise ValueError("Backup too small - possible data loss!")