Python Tutorials

Choose a lesson to begin

File I/O and Data Persistence

Files let your programs save data permanently. This tutorial covers reading and writing text files, working with JSON and CSV, and building systems that remember data between runs.

Why File I/O Matters

Without files, your program "forgets" everything when it closes:

high_score = 1000  # Lost when program ends!

With files, data persists:

# Save high score
with open("highscore.txt", "w") as f:
    f.write(str(1000))

Load high score later

with open("highscore.txt", "r") as f: high_score = int(f.read())

Opening Files

# Open for reading (default)
file = open("data.txt", "r")

Open for writing (overwrites existing)

file = open("data.txt", "w")

Open for appending (adds to end)

file = open("data.txt", "a")

Open for reading and writing

file = open("data.txt", "r+")

Always close when done!

file.close()

The Better Way: Context Managers

Always use with - it automatically closes files:

# Manual close (error-prone)
file = open("data.txt", "r")
data = file.read()
file.close()  # Easy to forget!

Automatic close (best practice)

with open("data.txt", "r") as file: data = file.read()

File automatically closed here

Writing Text Files

# Write string to file (overwrites)
with open("output.txt", "w") as f:
    f.write("Hello, World!\n")
    f.write("This is line 2\n")

Write multiple lines at once

lines = ["Line 1\n", "Line 2\n", "Line 3\n"] with open("output.txt", "w") as f: f.writelines(lines)

Append to existing file

with open("log.txt", "a") as f: f.write("New log entry\n")

Reading Text Files

# Read entire file as string
with open("data.txt", "r") as f:
    content = f.read()
    print(content)

Read line by line

with open("data.txt", "r") as f: for line in f: print(line.strip()) # Remove \n

Read all lines into list

with open("data.txt", "r") as f: lines = f.readlines() # ['line1\n', 'line2\n', ...]

Read one line at a time

with open("data.txt", "r") as f: first_line = f.readline() second_line = f.readline()

Checking If File Exists

import os
if os.path.exists("data.txt"):
    print("File exists!")
else:
    print("File not found")

Check if it's a file (not directory)

if os.path.isfile("data.txt"): print("It's a file")

Get file size

size = os.path.getsize("data.txt") print(f"File size: {size} bytes")

Working with Paths

import os

Join paths (works on any OS)

path = os.path.join("data", "users", "alice.txt")

Windows: data\users\alice.txt

Unix: data/users/alice.txt

Get directory name

directory = os.path.dirname("/path/to/file.txt") # /path/to

Get filename

filename = os.path.basename("/path/to/file.txt") # file.txt

Split extension

name, ext = os.path.splitext("document.txt") # ('document', '.txt')

Current directory

current = os.getcwd()

Create directory

os.makedirs("data/users", exist_ok=True) # Creates nested dirs

JSON - Saving Complex Data

JSON stores Python data structures in text format:

import json

Data to save

user = { "name": "Alice", "age": 30, "email": "alice@example.com", "hobbies": ["reading", "coding"], "premium": True, "settings": { "theme": "dark", "notifications": False } }

Save to JSON file

with open("user.json", "w") as f: json.dump(user, f, indent=2) # indent makes it readable

Load from JSON file

with open("user.json", "r") as f: loaded_user = json.load(f) print(loaded_user["name"]) # Alice print(loaded_user["hobbies"]) # ['reading', 'coding']

JSON String Conversion

import json
data = {"name": "Bob", "score": 95}

Convert to JSON string

json_string = json.dumps(data) print(json_string) # {"name": "Bob", "score": 95}

Parse JSON string

parsed = json.loads('{"name": "Charlie", "score": 87}') print(parsed["name"]) # Charlie

CSV - Tabular Data

Perfect for spreadsheet-like data:

import csv

Write CSV

data = [ ["Name", "Age", "City"], ["Alice", 30, "NYC"], ["Bob", 25, "LA"], ["Charlie", 35, "Chicago"] ] with open("users.csv", "w", newline="") as f: writer = csv.writer(f) writer.writerows(data)

Read CSV

with open("users.csv", "r") as f: reader = csv.reader(f) for row in reader: print(row) # ['Alice', '30', 'NYC']

Skip header row

with open("users.csv", "r") as f: reader = csv.reader(f) next(reader) # Skip header for row in reader: name, age, city = row print(f"{name} is {age} years old")

CSV with Dictionaries

More readable with column names:

import csv

Write CSV from dictionaries

users = [ {"name": "Alice", "age": 30, "city": "NYC"}, {"name": "Bob", "age": 25, "city": "LA"} ] with open("users.csv", "w", newline="") as f: fieldnames = ["name", "age", "city"] writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() # Write column names writer.writerows(users)

Read CSV as dictionaries

with open("users.csv", "r") as f: reader = csv.DictReader(f) for row in reader: print(row["name"], row["age"]) # Access by column name

Real-World Example: High Score System

import json
import os

class HighScoreManager: def __init__(self, filename="highscores.json"): self.filename = filename self.scores = self.load_scores() def load_scores(self): """Load scores from file, return empty list if file doesn't exist.""" if os.path.exists(self.filename): with open(self.filename, "r") as f: return json.load(f) return [] def save_scores(self): """Save scores to file.""" with open(self.filename, "w") as f: json.dump(self.scores, f, indent=2) def add_score(self, name, score): """Add new score and save.""" self.scores.append({ "name": name, "score": score, "timestamp": str(datetime.now()) }) # Sort by score (highest first) self.scores.sort(key=lambda x: x["score"], reverse=True) # Keep only top 10 self.scores = self.scores[:10] self.save_scores() def get_top_scores(self, n=10): """Return top N scores.""" return self.scores[:n] def display_scores(self): """Print high scores table.""" print("\n=== HIGH SCORES ===") for i, entry in enumerate(self.get_top_scores(), 1): print(f"{i}. {entry['name']}: {entry['score']}")

Use it

from datetime import datetime manager = HighScoreManager() manager.add_score("Alice", 1500) manager.add_score("Bob", 1200) manager.add_score("Charlie", 1800) manager.display_scores()

Real-World Example: Configuration System

import json
import os
class Config:
    def __init__(self, filename="config.json"):
        self.filename = filename
        self.data = self.load()    
    def load(self):
        """Load config from file with defaults."""
        defaults = {
            "window": {
                "width": 800,
                "height": 600,
                "fullscreen": False
            },
            "audio": {
                "volume": 80,
                "muted": False
            },
            "graphics": {
                "quality": "high",
                "vsync": True
            }
        }        
        if os.path.exists(self.filename):
            with open(self.filename, "r") as f:
                loaded = json.load(f)
                # Merge with defaults (in case new options added)
                return {**defaults, **loaded}        
        return defaults   
    def save(self):
        """Save current config."""
        with open(self.filename, "w") as f:
            json.dump(self.data, f, indent=2)    
    def get(self, path, default=None):
        """Get config value using dot notation."""
        keys = path.split(".")
        value = self.data
        for key in keys:
            if isinstance(value, dict) and key in value:
                value = value[key]
            else:
                return default
        return value    
    def set(self, path, value):
        """Set config value using dot notation."""
        keys = path.split(".")
        target = self.data
        for key in keys[:-1]:
            if key not in target:
                target[key] = {}
            target = target[key]
        target[keys[-1]] = value
        self.save()

Use it

config = Config() print(config.get("window.width")) # 800 config.set("audio.volume", 50) config.set("graphics.quality", "medium")

Real-World Example: CSV Data Processor

import csv
class SalesAnalyzer:
    def __init__(self, filename):
        self.filename = filename
        self.data = self.load_data()   
    def load_data(self):
        """Load sales data from CSV."""
        data = []
        with open(self.filename, "r") as f:
            reader = csv.DictReader(f)
            for row in reader:
                row["amount"] = float(row["amount"])
                row["quantity"] = int(row["quantity"])
                data.append(row)
        return data    
    def total_sales(self):
        """Calculate total sales amount."""
        return sum(row["amount"] for row in self.data)    
    def sales_by_product(self):
        """Group sales by product."""
        products = {}
        for row in self.data:
            product = row["product"]
            if product not in products:
                products[product] = {"total": 0, "quantity": 0}
            products[product]["total"] += row["amount"]
            products[product]["quantity"] += row["quantity"]
        return products    
    def top_products(self, n=5):
        """Get top N products by sales."""
        products = self.sales_by_product()
        sorted_products = sorted(
            products.items(),
            key=lambda x: x[1]["total"],
            reverse=True
        )
        return sorted_products[:n]
    def export_summary(self, output_file):
        """Export summary to new CSV."""
        products = self.sales_by_product()
        
        with open(output_file, "w", newline="") as f:
            fieldnames = ["product", "total_sales", "units_sold"]
            writer = csv.DictWriter(f, fieldnames=fieldnames)
            
            writer.writeheader()
            for product, stats in products.items():
                writer.writerow({
                    "product": product,
                    "total_sales": stats["total"],
                    "units_sold": stats["quantity"]
                })

Use it

analyzer = SalesAnalyzer("sales.csv") print(f"Total sales: ${analyzer.total_sales()}") print("\nTop 5 products:") for product, stats in analyzer.top_products(5): print(f" {product}: ${stats['total']:.2f}") analyzer.export_summary("sales_summary.csv")

Binary Files

For non-text data (images, executables, etc.):

# Read binary file
with open("image.png", "rb") as f:  # 'rb' = read binary
    data = f.read()

Write binary file

with open("copy.png", "wb") as f: # 'wb' = write binary f.write(data)

Copy file

def copy_file(source, destination): with open(source, "rb") as src: with open(destination, "wb") as dst: dst.write(src.read()) copy_file("original.png", "backup.png")

File Processing Patterns

Process Large File Line by Line

# Don't load entire file into memory
with open("huge_log.txt", "r") as f:
    for line in f:
        if "ERROR" in line:
            print(line.strip())

Safe File Writing

import os
def safe_write(filename, content):
    """Write to temp file, then rename (atomic operation)."""
    temp_file = filename + ".tmp"
    # Write to temp file
    with open(temp_file, "w") as f:
        f.write(content)  
    # Rename temp file to real filename
    os.replace(temp_file, filename)

Backup Before Writing

import shutil
def write_with_backup(filename, content):
    """Create backup before overwriting."""
    if os.path.exists(filename):
        backup = filename + ".backup"
        shutil.copy(filename, backup)    
    with open(filename, "w") as f:
        f.write(content)

Try It Out!

  • Build a simple note-taking app that saves notes to JSON
  • Create a CSV-based contact manager (add/search/delete)
  • Implement a save game system for a text adventure
  • Write a log analyzer that processes large log files
  • Build a settings manager with JSON persistence
  • Key Takeaways

    File I/O lets your programs remember and share data! 🐍

    🐍 Python Runner