跳过正文

Python 基础速查(运维向)

·1578 字·8 分钟·
目录

变量与类型
#

Python 是动态类型语言,变量不需要声明类型。运维脚本中最常用的类型:

# 基本类型
hostname = "web-01.prod"          # str
port = 8080                        # int
threshold = 0.85                   # float
is_healthy = True                  # bool
pid = None                         # NoneType

# 类型转换
port_str = str(port)               # "8080"
port_num = int("9090")             # 9090
ratio = float("0.75")              # 0.75

# 查看类型
print(type(hostname))              # <class 'str'>
print(isinstance(port, int))       # True

# 多重赋值
host, port, proto = "localhost", 3306, "tcp"
a = b = c = 0                      # 全部赋值为 0

字符串格式化
#

f-string 是最推荐的方式,Python 3.6+:

host = "db-primary"
port = 5432
latency_ms = 12.456

# f-string(推荐)
msg = f"连接 {host}:{port},延迟 {latency_ms:.1f}ms"
# 连接 db-primary:5432,延迟 12.5ms

# 格式控制
print(f"{host:>20}")               # 右对齐,宽度20
print(f"{port:05d}")               # 补零:05432
print(f"{latency_ms:.2f}")        # 保留2位小数:12.46
print(f"{1024 * 1024:,}")         # 千分位:1,048,576

# 多行 f-string
report = (
    f"Host  : {host}\n"
    f"Port  : {port}\n"
    f"Status: {'UP' if latency_ms < 100 else 'SLOW'}"
)

# 常用字符串方法
url = "  https://API.Example.COM/v1/health  "
print(url.strip())                 # 去首尾空白
print(url.lower())                 # 转小写
print(url.upper())                 # 转大写
print(url.replace("https", "http"))
print(url.split("/"))              # 按分隔符切分
print(",".join(["a", "b", "c"]))  # 拼接
print(url.startswith("  https"))   # True
print(url.endswith("health  "))    # True
print("API" in url)                # True

# 字符串解析
line = "2025-12-09 10:23:45 ERROR web-01 connection refused"
parts = line.split(maxsplit=4)     # 最多切4刀
date, time, level, host, msg = parts

列表、字典、集合
#

# ===== 列表 =====
servers = ["web-01", "web-02", "db-01"]

servers.append("cache-01")         # 追加
servers.insert(0, "lb-01")         # 指定位置插入
servers.remove("db-01")            # 删除指定值
popped = servers.pop()             # 弹出末尾
servers.sort()                     # 原地排序
sorted_srv = sorted(servers)       # 返回新列表
servers.reverse()                  # 原地翻转
print(len(servers))                # 长度
print("web-01" in servers)        # 成员判断

# 切片
first_two = servers[:2]
last_one = servers[-1:]
reversed_list = servers[::-1]      # 翻转副本

# ===== 字典 =====
server_info = {
    "host": "web-01",
    "ip": "10.0.1.10",
    "port": 80,
    "tags": ["nginx", "prod"],
}

# 读取
host = server_info["host"]
port = server_info.get("port", 80)           # 带默认值
region = server_info.get("region", "unknown")

# 修改
server_info["status"] = "healthy"
server_info.update({"version": "1.2.3", "weight": 100})

# 遍历
for key, val in server_info.items():
    print(f"  {key}: {val}")

keys = list(server_info.keys())
vals = list(server_info.values())

# 删除
del server_info["weight"]
popped_val = server_info.pop("version", None)

# 嵌套字典
inventory = {
    "web-01": {"ip": "10.0.1.10", "cpu": 4, "mem_gb": 8},
    "web-02": {"ip": "10.0.1.11", "cpu": 4, "mem_gb": 16},
}
print(inventory["web-01"]["ip"])

# ===== 集合 =====
healthy = {"web-01", "web-02", "db-01"}
degraded = {"web-02", "cache-01"}

print(healthy & degraded)          # 交集:{'web-02'}
print(healthy | degraded)          # 并集
print(healthy - degraded)          # 差集(只在healthy中)
print(healthy ^ degraded)          # 对称差集

# 去重
hosts_with_dup = ["web-01", "web-02", "web-01", "db-01"]
unique_hosts = list(set(hosts_with_dup))

控制流与推导式
#

# ===== 条件 =====
status_code = 503

if status_code == 200:
    print("OK")
elif status_code in (301, 302):
    print("重定向")
elif 500 <= status_code < 600:
    print(f"服务端错误: {status_code}")
else:
    print("未知状态")

# 三元表达式
label = "健康" if status_code == 200 else "异常"

# ===== 循环 =====
servers = ["web-01", "web-02", "db-01"]

for srv in servers:
    print(srv)

for i, srv in enumerate(servers, start=1):
    print(f"{i}. {srv}")

# zip 同步迭代
ips = ["10.0.1.10", "10.0.1.11", "10.0.2.10"]
for srv, ip in zip(servers, ips):
    print(f"{srv} -> {ip}")

# while
retries = 0
max_retries = 3
while retries < max_retries:
    # ... 尝试连接 ...
    retries += 1

# break / continue
for srv in servers:
    if srv.startswith("db"):
        continue                   # 跳过数据库节点
    if srv == "web-02":
        break                      # 提前退出

# ===== 推导式 =====
# 列表推导式
web_servers = [s for s in servers if s.startswith("web")]

# 字典推导式
port_map = {srv: 80 for srv in servers}
upper_map = {k: v.upper() for k, v in {"a": "x", "b": "y"}.items()}

# 集合推导式
prefixes = {srv.split("-")[0] for srv in servers}  # {'web', 'db'}

# 生成器表达式(不构建列表,节省内存)
total = sum(len(s) for s in servers)
any_down = any(s.endswith("-bad") for s in servers)
all_prod = all("prod" in s for s in servers)

函数
#

from typing import Optional, List, Dict, Any

# 基础函数
def check_port(host: str, port: int) -> bool:
    """检查端口是否可达。"""
    import socket
    try:
        with socket.create_connection((host, port), timeout=3):
            return True
    except (socket.timeout, ConnectionRefusedError, OSError):
        return False


# 默认参数
def retry(func, max_attempts: int = 3, delay: float = 1.0):
    import time
    for attempt in range(1, max_attempts + 1):
        try:
            return func()
        except Exception as e:
            if attempt == max_attempts:
                raise
            time.sleep(delay)


# *args 和 **kwargs
def log_event(level: str, *messages: str, **context: Any) -> None:
    """
    log_event("INFO", "服务启动", "监听端口", host="web-01", port=8080)
    """
    msg = " ".join(messages)
    ctx = " ".join(f"{k}={v}" for k, v in context.items())
    print(f"[{level}] {msg} | {ctx}")


# 仅关键字参数(* 后面的参数必须用关键字传递)
def deploy(service: str, *, version: str, env: str = "prod") -> None:
    print(f"部署 {service} v{version}{env}")


deploy("api", version="1.2.3")           # 正确
# deploy("api", "1.2.3")                 # 报错

# lambda(适合简单的单行函数)
servers = [
    {"name": "web-02", "weight": 50},
    {"name": "web-01", "weight": 100},
]
servers.sort(key=lambda s: s["weight"], reverse=True)

# 返回多个值(实际是元组)
def parse_endpoint(endpoint: str) -> tuple[str, int]:
    host, port_str = endpoint.rsplit(":", 1)
    return host, int(port_str)

host, port = parse_endpoint("10.0.1.10:8080")

异常处理
#

import socket
import json
from pathlib import Path

# 基本结构
def load_config(path: str) -> dict:
    try:
        with open(path) as f:
            return json.load(f)
    except FileNotFoundError:
        print(f"配置文件不存在: {path}")
        return {}
    except json.JSONDecodeError as e:
        print(f"JSON 解析失败: {e}")
        return {}
    except PermissionError:
        print(f"无权限读取: {path}")
        raise                          # 重新抛出
    finally:
        print("load_config 执行完毕")  # 无论如何都执行


# 捕获多个异常
def connect(host: str, port: int) -> socket.socket:
    try:
        s = socket.create_connection((host, port), timeout=5)
        return s
    except (socket.timeout, TimeoutError):
        raise RuntimeError(f"连接 {host}:{port} 超时")
    except ConnectionRefusedError:
        raise RuntimeError(f"{host}:{port} 拒绝连接")
    except OSError as e:
        raise RuntimeError(f"网络错误: {e}") from e


# 自定义异常
class DeployError(Exception):
    def __init__(self, service: str, reason: str):
        self.service = service
        self.reason = reason
        super().__init__(f"部署失败 [{service}]: {reason}")


class RollbackError(DeployError):
    pass


def deploy_service(service: str, version: str) -> None:
    if not version.startswith("v"):
        raise DeployError(service, f"版本格式错误: {version}")
    # ... 部署逻辑 ...


# 使用 contextlib.suppress 忽略特定异常
from contextlib import suppress

with suppress(FileNotFoundError):
    Path("/tmp/old-lock").unlink()     # 文件不存在时静默跳过

文件操作
#

from pathlib import Path
import json
import yaml  # pip install pyyaml

# ===== pathlib(推荐)=====
p = Path("/var/log/nginx")

# 路径构造
log_file = p / "access.log"
config = Path.home() / ".config" / "myapp" / "config.yaml"

# 路径信息
print(log_file.name)               # "access.log"
print(log_file.stem)               # "access"
print(log_file.suffix)             # ".log"
print(log_file.parent)             # PosixPath('/var/log/nginx')
print(log_file.exists())
print(log_file.is_file())
print(log_file.is_dir())
size = log_file.stat().st_size     # 文件大小(字节)

# 创建目录
config.parent.mkdir(parents=True, exist_ok=True)

# 读写文件
text = log_file.read_text(encoding="utf-8")
log_file.write_text("内容", encoding="utf-8")

# 遍历目录
log_dir = Path("/var/log")
for f in log_dir.iterdir():
    print(f)

# glob 匹配
for log in log_dir.glob("*.log"):
    print(log)
for log in log_dir.rglob("error*.log"):   # 递归
    print(log)

# ===== with open(传统写法)=====
# 读文本
with open("/etc/hosts") as f:
    lines = f.readlines()          # 全部行列表
    # 或者逐行迭代(大文件推荐)
    # for line in f: ...

# 写文本
with open("/tmp/report.txt", "w", encoding="utf-8") as f:
    f.write("第一行\n")
    f.writelines(["line2\n", "line3\n"])

# 追加
with open("/tmp/report.txt", "a") as f:
    f.write("追加内容\n")

# ===== JSON =====
data = {"host": "web-01", "port": 80}

# 写
with open("/tmp/data.json", "w") as f:
    json.dump(data, f, indent=2, ensure_ascii=False)

# 读
with open("/tmp/data.json") as f:
    loaded = json.load(f)

# 字符串互转
json_str = json.dumps(data, ensure_ascii=False)
parsed = json.loads(json_str)

# ===== YAML =====
# 读
with open("config.yaml") as f:
    cfg = yaml.safe_load(f)         # 用 safe_load,不用 load

# 写
with open("output.yaml", "w") as f:
    yaml.dump(data, f, allow_unicode=True, default_flow_style=False)

类型注解基础
#

Python 3.9+ 内置泛型,3.10+ 支持 X | Y 联合类型:

from typing import Optional, Union, Callable
from collections.abc import Iterator, Generator

# 基本注解
def ping(host: str, count: int = 4) -> bool: ...

# 容器类型(Python 3.9+)
def filter_servers(servers: list[str], tag: str) -> list[str]: ...
def get_inventory() -> dict[str, dict]: ...

# 可选参数(Python 3.10+ 用 X | None)
def connect(host: str, proxy: str | None = None) -> None: ...
# 等价于:
def connect2(host: str, proxy: Optional[str] = None) -> None: ...

# Union(Python 3.10+ 用 X | Y)
def parse_port(val: str | int) -> int:
    return int(val)

# 可调用类型
def run_with_retry(fn: Callable[[], bool], retries: int) -> bool:
    for _ in range(retries):
        if fn():
            return True
    return False

# TypedDict(结构化字典)
from typing import TypedDict

class ServerInfo(TypedDict):
    host: str
    port: int
    healthy: bool

def check(info: ServerInfo) -> str:
    return f"{info['host']}:{info['port']}"

模块与包
#

# 导入方式
import os
import os.path
from pathlib import Path
from subprocess import run, PIPE, CalledProcessError
from typing import Optional
import json as j                          # 别名

# 条件导入(兼容)
try:
    import ujson as json                  # 更快的JSON库
except ImportError:
    import json

# __name__ 判断(脚本模式)
if __name__ == "__main__":
    main()

# 相对导入(包内部使用)
# from .utils import parse_config
# from ..common import logger

运维工程师必知标准库速览
#

主要用途常用入口
os环境变量、进程、路径操作os.environ, os.getpid(), os.getcwd()
sys解释器参数、退出、stdin/stdoutsys.argv, sys.exit(), sys.path
subprocess执行外部命令subprocess.run(), Popen
pathlib路径操作(推荐替代 os.path)Path(...), glob(), rglob()
shutil文件/目录复制、移动、打包shutil.copy2(), copytree(), make_archive()
jsonJSON 序列化/反序列化json.load(), json.dump()
yamlYAML 解析(第三方 PyYAML)yaml.safe_load(), yaml.dump()
re正则表达式re.search(), re.findall(), re.sub()
logging结构化日志logging.getLogger(), basicConfig()
argparse命令行参数解析ArgumentParser, add_argument()
datetime日期时间处理datetime.now(), timedelta, strftime()
time时间戳、sleeptime.time(), time.sleep()
socket网络连接、端口检测socket.create_connection()
threading线程Thread, Lock, Event
concurrent.futures线程/进程池ThreadPoolExecutor, as_completed()
hashlibMD5/SHA 哈希hashlib.md5(), sha256()
base64Base64 编解码base64.b64encode(), b64decode()
gzip / tarfile压缩/解压gzip.open(), tarfile.open()
tempfile临时文件/目录tempfile.mkdtemp(), NamedTemporaryFile
functools高阶函数工具lru_cache, partial, reduce
itertools迭代器工具chain, groupby, islice
collections高级容器defaultdict, Counter, deque

高频用法示例
#

import os
import sys
import re
import hashlib
from datetime import datetime, timedelta
from collections import defaultdict, Counter

# os.environ
db_host = os.environ.get("DB_HOST", "localhost")
os.environ["APP_ENV"] = "prod"

# sys.exit
if not os.path.exists("/etc/app/config.yaml"):
    print("缺少配置文件", file=sys.stderr)
    sys.exit(1)

# re 正则
log_line = "2025-12-09 10:23:45 ERROR [web-01] connection refused (errno=111)"
pattern = r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) (\w+) \[(.+?)\] (.+)"
m = re.match(pattern, log_line)
if m:
    ts, level, host, msg = m.groups()

# 提取所有 IP
text = "servers: 10.0.1.10, 10.0.1.11, and 192.168.0.1"
ips = re.findall(r"\b\d{1,3}(?:\.\d{1,3}){3}\b", text)

# datetime
now = datetime.now()
yesterday = now - timedelta(days=1)
ts_str = now.strftime("%Y-%m-%d %H:%M:%S")
parsed = datetime.strptime("2025-12-09 10:00:00", "%Y-%m-%d %H:%M:%S")

# Counter 统计日志级别分布
levels = ["INFO", "ERROR", "INFO", "WARN", "ERROR", "ERROR"]
counter = Counter(levels)
print(counter.most_common(3))   # [('ERROR', 3), ('INFO', 2), ('WARN', 1)]

# defaultdict 聚合
server_errors: dict[str, list[str]] = defaultdict(list)
events = [("web-01", "timeout"), ("web-02", "refused"), ("web-01", "500")]
for srv, err in events:
    server_errors[srv].append(err)

# hashlib 校验文件
def file_md5(path: str) -> str:
    h = hashlib.md5()
    with open(path, "rb") as f:
        for chunk in iter(lambda: f.read(65536), b""):
            h.update(chunk)
    return h.hexdigest()
Wenzhuo Huang
作者
Wenzhuo Huang
搞运维的工程师,写代码的运维人。专注 Kubernetes、AWS、GitOps 与基础设施可靠性。这个博客既是我的技术笔记本,也是我踩过的坑的受害者档案。

相关文章

Python 系统与文件操作实战

·1540 字·8 分钟
深入讲解 Python 系统操作,含 subprocess 进程管理、psutil 系统监控,以及一个完整的生产级日志清理脚本

Python 网络编程与 HTTP 请求

·1679 字·8 分钟
从 requests 基础到 httpx 异步,再到并发健康检查脚本,覆盖运维工程师日常 HTTP 操作场景