Files let your programs save data permanently. This tutorial covers reading and writing text files, working with JSON and CSV, and building systems that remember data between runs.
Without files, your program "forgets" everything when it closes:
high_score = 1000 # Lost when program ends!With files, data persists:
# Save high score
with open("highscore.txt", "w") as f:
f.write(str(1000))
Load high score later
with open("highscore.txt", "r") as f:
high_score = int(f.read())# Open for reading (default)
file = open("data.txt", "r")
Open for writing (overwrites existing)
file = open("data.txt", "w")
Open for appending (adds to end)
file = open("data.txt", "a")
Open for reading and writing
file = open("data.txt", "r+")
Always close when done!
file.close()Always use with - it automatically closes files:
# Manual close (error-prone)
file = open("data.txt", "r")
data = file.read()
file.close() # Easy to forget!
Automatic close (best practice)
with open("data.txt", "r") as file:
data = file.read()
File automatically closed here
# Write string to file (overwrites)
with open("output.txt", "w") as f:
f.write("Hello, World!\n")
f.write("This is line 2\n")
Write multiple lines at once
lines = ["Line 1\n", "Line 2\n", "Line 3\n"]
with open("output.txt", "w") as f:
f.writelines(lines)
Append to existing file
with open("log.txt", "a") as f:
f.write("New log entry\n")# Read entire file as string
with open("data.txt", "r") as f:
content = f.read()
print(content)
Read line by line
with open("data.txt", "r") as f:
for line in f:
print(line.strip()) # Remove \n
Read all lines into list
with open("data.txt", "r") as f:
lines = f.readlines() # ['line1\n', 'line2\n', ...]
Read one line at a time
with open("data.txt", "r") as f:
first_line = f.readline()
second_line = f.readline()import os
if os.path.exists("data.txt"):
print("File exists!")
else:
print("File not found")
Check if it's a file (not directory)
if os.path.isfile("data.txt"):
print("It's a file")
Get file size
size = os.path.getsize("data.txt")
print(f"File size: {size} bytes")import os
Join paths (works on any OS)
path = os.path.join("data", "users", "alice.txt")
Windows: data\users\alice.txt
Unix: data/users/alice.txt
Get directory name
directory = os.path.dirname("/path/to/file.txt") # /path/to
Get filename
filename = os.path.basename("/path/to/file.txt") # file.txt
Split extension
name, ext = os.path.splitext("document.txt") # ('document', '.txt')
Current directory
current = os.getcwd()
Create directory
os.makedirs("data/users", exist_ok=True) # Creates nested dirsJSON stores Python data structures in text format:
import json
Data to save
user = {
"name": "Alice",
"age": 30,
"email": "alice@example.com",
"hobbies": ["reading", "coding"],
"premium": True,
"settings": {
"theme": "dark",
"notifications": False
}
}
Save to JSON file
with open("user.json", "w") as f:
json.dump(user, f, indent=2) # indent makes it readable
Load from JSON file
with open("user.json", "r") as f:
loaded_user = json.load(f)
print(loaded_user["name"]) # Alice
print(loaded_user["hobbies"]) # ['reading', 'coding']import json
data = {"name": "Bob", "score": 95}
Convert to JSON string
json_string = json.dumps(data)
print(json_string) # {"name": "Bob", "score": 95}
Parse JSON string
parsed = json.loads('{"name": "Charlie", "score": 87}')
print(parsed["name"]) # CharliePerfect for spreadsheet-like data:
import csv
Write CSV
data = [
["Name", "Age", "City"],
["Alice", 30, "NYC"],
["Bob", 25, "LA"],
["Charlie", 35, "Chicago"]
]
with open("users.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(data)
Read CSV
with open("users.csv", "r") as f:
reader = csv.reader(f)
for row in reader:
print(row) # ['Alice', '30', 'NYC']
Skip header row
with open("users.csv", "r") as f:
reader = csv.reader(f)
next(reader) # Skip header
for row in reader:
name, age, city = row
print(f"{name} is {age} years old")More readable with column names:
import csvWrite CSV from dictionaries
users = [
{"name": "Alice", "age": 30, "city": "NYC"},
{"name": "Bob", "age": 25, "city": "LA"}
]
with open("users.csv", "w", newline="") as f:
fieldnames = ["name", "age", "city"]
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader() # Write column names
writer.writerows(users)
Read CSV as dictionaries
with open("users.csv", "r") as f:
reader = csv.DictReader(f)
for row in reader:
print(row["name"], row["age"]) # Access by column nameimport json
import osclass HighScoreManager:
def __init__(self, filename="highscores.json"):
self.filename = filename
self.scores = self.load_scores()
def load_scores(self):
"""Load scores from file, return empty list if file doesn't exist."""
if os.path.exists(self.filename):
with open(self.filename, "r") as f:
return json.load(f)
return []
def save_scores(self):
"""Save scores to file."""
with open(self.filename, "w") as f:
json.dump(self.scores, f, indent=2)
def add_score(self, name, score):
"""Add new score and save."""
self.scores.append({
"name": name,
"score": score,
"timestamp": str(datetime.now())
})
# Sort by score (highest first)
self.scores.sort(key=lambda x: x["score"], reverse=True)
# Keep only top 10
self.scores = self.scores[:10]
self.save_scores()
def get_top_scores(self, n=10):
"""Return top N scores."""
return self.scores[:n]
def display_scores(self):
"""Print high scores table."""
print("\n=== HIGH SCORES ===")
for i, entry in enumerate(self.get_top_scores(), 1):
print(f"{i}. {entry['name']}: {entry['score']}")
Use it
from datetime import datetime
manager = HighScoreManager()
manager.add_score("Alice", 1500)
manager.add_score("Bob", 1200)
manager.add_score("Charlie", 1800)
manager.display_scores()import json
import os
class Config:
def __init__(self, filename="config.json"):
self.filename = filename
self.data = self.load()
def load(self):
"""Load config from file with defaults."""
defaults = {
"window": {
"width": 800,
"height": 600,
"fullscreen": False
},
"audio": {
"volume": 80,
"muted": False
},
"graphics": {
"quality": "high",
"vsync": True
}
}
if os.path.exists(self.filename):
with open(self.filename, "r") as f:
loaded = json.load(f)
# Merge with defaults (in case new options added)
return {**defaults, **loaded}
return defaults
def save(self):
"""Save current config."""
with open(self.filename, "w") as f:
json.dump(self.data, f, indent=2)
def get(self, path, default=None):
"""Get config value using dot notation."""
keys = path.split(".")
value = self.data
for key in keys:
if isinstance(value, dict) and key in value:
value = value[key]
else:
return default
return value
def set(self, path, value):
"""Set config value using dot notation."""
keys = path.split(".")
target = self.data
for key in keys[:-1]:
if key not in target:
target[key] = {}
target = target[key]
target[keys[-1]] = value
self.save()
Use it
config = Config()
print(config.get("window.width")) # 800
config.set("audio.volume", 50)
config.set("graphics.quality", "medium")import csv
class SalesAnalyzer:
def __init__(self, filename):
self.filename = filename
self.data = self.load_data()
def load_data(self):
"""Load sales data from CSV."""
data = []
with open(self.filename, "r") as f:
reader = csv.DictReader(f)
for row in reader:
row["amount"] = float(row["amount"])
row["quantity"] = int(row["quantity"])
data.append(row)
return data
def total_sales(self):
"""Calculate total sales amount."""
return sum(row["amount"] for row in self.data)
def sales_by_product(self):
"""Group sales by product."""
products = {}
for row in self.data:
product = row["product"]
if product not in products:
products[product] = {"total": 0, "quantity": 0}
products[product]["total"] += row["amount"]
products[product]["quantity"] += row["quantity"]
return products
def top_products(self, n=5):
"""Get top N products by sales."""
products = self.sales_by_product()
sorted_products = sorted(
products.items(),
key=lambda x: x[1]["total"],
reverse=True
)
return sorted_products[:n]
def export_summary(self, output_file):
"""Export summary to new CSV."""
products = self.sales_by_product()
with open(output_file, "w", newline="") as f:
fieldnames = ["product", "total_sales", "units_sold"]
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
for product, stats in products.items():
writer.writerow({
"product": product,
"total_sales": stats["total"],
"units_sold": stats["quantity"]
})
Use it
analyzer = SalesAnalyzer("sales.csv")
print(f"Total sales: ${analyzer.total_sales()}")
print("\nTop 5 products:")
for product, stats in analyzer.top_products(5):
print(f" {product}: ${stats['total']:.2f}")
analyzer.export_summary("sales_summary.csv")For non-text data (images, executables, etc.):
# Read binary file
with open("image.png", "rb") as f: # 'rb' = read binary
data = f.read()
Write binary file
with open("copy.png", "wb") as f: # 'wb' = write binary
f.write(data)
Copy file
def copy_file(source, destination):
with open(source, "rb") as src:
with open(destination, "wb") as dst:
dst.write(src.read())
copy_file("original.png", "backup.png")# Don't load entire file into memory
with open("huge_log.txt", "r") as f:
for line in f:
if "ERROR" in line:
print(line.strip())import os
def safe_write(filename, content):
"""Write to temp file, then rename (atomic operation)."""
temp_file = filename + ".tmp"
# Write to temp file
with open(temp_file, "w") as f:
f.write(content)
# Rename temp file to real filename
os.replace(temp_file, filename)import shutil
def write_with_backup(filename, content):
"""Create backup before overwriting."""
if os.path.exists(filename):
backup = filename + ".backup"
shutil.copy(filename, backup)
with open(filename, "w") as f:
f.write(content)with statement for automatic file closing"r" for reading, "w" for writing, "a" for appendingos.path.join() for cross-platform paths