Python最佳编码实践
译自: http://www.fantascienza.net/leonardo/ar/python_best_practices.html
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
有时候,甚至优秀的程序员在他们首次尝试用Python时,会很少使用最佳方案和数据结构。在过去数年中,Python也逐渐累积了一些诸如:冗余,缺点等瑕疵(它们中的一部分会从Python3.0中移除,这篇文章主要基于Python 2.5,不针对之后的版本,比如说Python 2.6,同样不针对Jython 或 PyPy 或 IronPython 它们之间有点微妙的差异),但是总而言之,Python依然是非常干净、简洁的语言,本文也像这样,目的是为了让你避免程序开发实践中最可能碰到的误区。本文同样简短,你甚至能发现在线解释。
当然啦,我也许可能是错误的,但该页(文)来自一些练习,因此当你不同意我观点的时候,我建议你通过邮件组或者网页去查找答案,而不是想当然的自以为是。如果你的确发现我错误,或者有任何建议,看法。请通过邮件练习我们,我们会非常高兴,并改正错误.
差的代码格式
|
好的代码格式
|
x=5 if ( (x==8) and (y>5) ) : ... 1<<5&2 return(5); while (x<5) : ... 7. |
x = 5 if x == 8 and y > 5: ... (1 << 5) & 2 return 5 while x < 5: ... 7.0 |
print x,x*x+1 v[i + 1 + a] + v[i + 2 + b] |
# 有时候规则可以打破,比如要展示组的时候,可以下面这样 print x, x*x + 1 v[i+a+1] + v[i+b+2] |
def Function ( x ): ... | def function(x): ... |
class fooclass: ... | class Fooclass(object): ... |
d = dict() | freqs = {} # 变量最好用描述性名称 # 小范围来说,短的变量名业务不错的选择。 |
list = [1, 2, 3] dict = {'alpha': 0x1234} sum = x + y |
# 不要用内置的保留字来做变量名 values = [1, 2, 3] symbol_address = {'alpha': 0x1234} tot = x + y |
"some string" and 'some string' and """some string""" and '''some string''' # 上述的格式是一样,一样,一样的 |
|
mapping = { 5 :"5", 6:"6" } | mapping = {5: "5", 6: "6"} |
mapping = {5 : "5", 6 : "6"} if mapping.has_key(6): ... |
mapping = {5: "5", 6: "6"} if 6 in mapping: ... |
def function( x, l = [] ): ... | # 通常不用可变对象,比如列表来做默认值 def function(x, items=None): ... if items is None: items = [] |
if x == None: ... | if x is None: ... |
x = 1 if z > 5: var1 = 55 |
# 总是用4个空格作为缩进 # (或者总是用TAB键作为缩进,但这样同样不好) x = 1 if z > 5: var1 = 55 |
mapping = {5 : "5", 6 : "6"} for key, val in mapping.items(): ... for key in mapping.keys(): ... |
# 尽可能使用iter*开头的迭代方法 mapping = {5: "5", 6: "6"} for key, val in mapping.iteritems(): ... for key in mapping: ... |
for i in range(10, 20000): ... | for i in xrange(10, 20000): ... |
# Use to denote the code that has to # run when a module is executed and not # imported: if __name__ == '__main__': |
|
# Python profiler: python -m profile -o stats myscript.py >>> import pstats >>> p = pstats.Stats('stats') >>> p.sort_stats('time').print_stats(15) |
|
对于含非ASCII编码的字符,在首行添加: # -*- coding: UTF-8 -*- # 或者如果你电脑内存不足,就干脆使用latin编码 # coding: latin |
|
al = [1, 2, 3] for i in xrange(len(al)-1, -1, -1): del al[i] |
items = [1, 2, 3] del items[:] # 如果程序速度不是最重要的话,可以仅仅新增加以个空列表: items = [] # 如果你只想移除一个列表的引用值时,可以使用: del items |
repeat xxx until yyy |
# 等价于: while True xxx if yyy: break |
# 增加一个包含所需模块压缩文件到搜索路径中 sys.path.append("some.zip") |
|
a = 5 b = 6 aux = a a = b b = aux |
a = 5 b = 6 a, b = b, a # 两个变量交换值,无需中间变量 |
if x < 10 and x > 2: ... | if 2 < x < 10: ... |
a = 5 b = 5 c = 5 |
a = b = c = 5 |
if x == 1: y = fun1(x) else if x == 2: y = fun2(x) else if x == 3: y = fun3(x) else: y = None |
if x == 1: y = fun1(x) elif x == 2: y = fun2(x) elif x == 3: y = fun3(x) else: y = None # 但又时候,使用字典无疑更好些: funs = {1: fun1, 2: fun2, 3: fun3} y = funs.get(x, lambda x:None)(x) |
mapping = {5 : "5", 6 : "6"} for key in mapping.iterkeys(): ... |
mapping = {5: "5", 6: "6"} for key in mapping: ... |
al = [1, 2, 3] for i in xrange(len(al)): print al[i] |
al = [1, 2, 3] for el in al: print el |
al = [1, 2, 3] for i in xrange(len(al)-1, -1, -1): print al[i] |
al = [1, 2, 3] for el in reversed(al): print el |
class Test(object): def __init__(I, x): ... |
class Test(object): def __init__(self, x): ... |
# Compute the sum of the ... def sum_of(x, y, z): ... |
def sum_of(x, y, z): ... """Compute the sum of the ...""" |
from operator import add sl = ["ab", "cd", "ef"] all = "" for s in sl: all += s # Or: sl = ["ab", "cd", "ef"] all = reduce(lambda x,y: x+y, sl, "") |
sl = ["ab", "cd", "ef"] all = "".join(sl) |
a = "this isn't a word, right?" a = a.replace("'", " ") a = a.replace(".", " ") a = a.replace("?", " ") a = a.replace(",", "") |
# 用.来代替,效果及速度更快些 from string import maketrans tab = maketrans("'.?", " ") a = "this isn't a word, right." afilt = a.translate(tab, ",") |
values = ["stop",0,0] | values = ["stop", 0, 0] |
def mul(x, y): return x*y l = [2, 3] print apply(mul, l) |
def mul(x, y): return x * y l = [2, 3] print mul(*l) |
vals = [2, 3, -5, 0] result = [] for el in vals: if el > 0: result.append(el * el) |
vals = [2, 3, -5, 0] result = [el * el for el in vals if el > 0] |
l = [0] * 4 m = [l] * 4 m[1][1] = 5 print m |
# 一个正确的做法是创建一个矩阵: m = [[0] * 4 for _ in xrange(4)] m[1][1] = 5 print m |
a = 1 print a / 2, a / float(2) |
# 一种可替代方案: from __future__ import division a = 1 print a // 2, a / 2 |
class Foo(object): def __init__(self, x, y, z): self.x_public = x self.y_private = y self.z_veryprivate = z def getx(self): return self.x_public print Foo(1, 2, 3).getx() |
# Generally getters and setters are not used. |
finder = re.compile("^\s*([\[\]])\s*([-+]?\d+) \s*,\s*([-+]?\d+)\s*([\[\]])\s*$") |
finder = re.compile(r""" ^ \s* # start at beginning+ opt spaces ( [\[\]] ) # Group 1: opening bracket \s* # optional spaces ( [-+]? \d+ ) # Group 2: first number \s* , \s* # opt spaces+ comma+ opt spaces ( [-+]? \d+ ) # Group 3: second number \s* # opt spaces ( [\[\]] ) # Group 4: closing bracket \s* $ # opt spaces+ end at the end """, flags=re.VERBOSE) # 上面的关于正则表达式代码就很容易读懂. # 每行就像代码一样. # 下面是另外一个不错的关于正则表达式排版方式: spaces = r"\s*" # optional spaces number = r"( [-+]? \d+ )" # Group bracket = r"( [\[\]] )" # Group. Closing bracket parts = ["^", bracket, number, ",", number, bracket, "$"] finder = re.compile(spaces.join(parts), flags=re.VERBOSE) |
def function(data): """A comment""" ...implementation... |
# 使用doctests模块(或者tests模块): def function(data): """A comment >>> function() if __name__ == "__main__": |
x = (1, 2, 6, 55, 63, 96, 125, 256, \ 301, 456, 958, 1256, \ 1359, 2568, 3597) |
x = (1, 2, 6, 55, 63, 96, 125, 256, 301, 456, 958, 1256, 1359, 2568, 3597) # 太多的行,必须用\来作为分割符 # 但\在() [] {}中是不必要的 |
from Tkinter import * from mymodule import * |
import Tkinter as tk from mymodule import fun1, Class1, baseconvert as bc |
import psyco psyco.bind(myfun1) a = [3.56, 2.12] |
try: import psyco # psyco中的类非常有用 from psyco.classes import __metaclass__ psyco.bind(myfun1) except ImportError: pass # 使用psyco array.array中 # 双精度值可以更快些 import array a = array.array("d", [3.56, 2.12]) # 在一些情形下,使用字符串数组同样很快 # psyco同map,filter,生成器一起使用也会很慢 can be slow with itertools, map, filter # 但会比列表解析快 # 最快的Psyco是使用低级别的编码格式 |
# 打印一个不包含空格的字符串: from sys import stdout stdout.write(string1) stdout.write(string2) |
|
This is good enough: words = ['me', 'do' 'bye', 'taz', 'foo', 'bar'] A shorter, more readable, but slower alternative: words = 'me do bye taz foo bar'.split() |
|
# sorting on the second item of the tuple # try to remove the i index from the temporary tuples lp = [(5J,"b"),(2J,"c"),(3+1J,"a"),(1+2J,"a")] lp2 = [(c, i, n) for i,(n, c) in enumerate(lp)] lp2.sort() print [(n, c) for (c, i, n) in lp2] |
from operator import itemgetter lp = [(5J, "b"), (2J, "c"), (3+1J, "a"), (1+2J, "a")] print sorted(lp, key=itemgetter(1)) |
vals = [5, 7 ,8] tot = -2.0 for v in vals: tot += v |
vals = [5, 7 ,8] tot = sum(vals, -2.0) |
ll = [[1, 2, 3], [4], [5, 6]] print sum(ll, []) |
data = [[1, 2, 3], [4], [5, 6]] result = [] for sublist in data: result.extend(sublist) # 最快速度实现 from itertools import imap data = [[1, 2, 3], [4], [5, 6]] result = [None] * sum(imap(len, data)) pos = 0 for sublist in data: lensl = len(sublist) result[pos : pos+lensl] = sublist pos += lensl |
print "%s %s" % (string1, string2) print '"' + chr(c) + '":', freq[c] |
print string1, string2 print '"%c": %d' % (c, freq[c]) |
[' ', c][c.isalpha()] | # Python 2.5以上版本支持: (c if c.isalpha() else ' ') |
# 如何反转一个字符串,列表等. alist[::-1] astring[::-1] |
|
# To negate (inplace) each second # element of alist: result = [] for (i, v) in enumerate(alist): # faster than i % 2 if i & 1 == 0: result.append(v) else: result.append(-v) alist[:] = result |
from operator import neg alist[1::2] = map(neg, alist[1::2]) # 下面方式虽慢点,但是易读: alist[1::2] = [-el for el in alist[1::2]] |
# 隐式拷贝一个字典或列表: # (元祖无需拷贝) newlist = list(alist) newdict = dict(adict) # 或者仅仅是: newlist = list[:] |
|
import sys sys.exit() |
# 停掉一个控制台: raise SystemExit #或者仅仅是: exit() |
if type(s) == type(""): ... if type(seq) == list or \ type(seq) == tuple: ... |
if isinstance(s, basestring): ... if isinstance(seq, (list, tuple)): ... # Or even: if hasattr(seq, "__getitem__"): ... # But quite often in dynamic languages you # don't test types, you just use them (look # for duck typing), catching exception that # may occur. |
name1 = 5; name2 = 20; print name2 a = 1 b = 2 c = 3 |
name1 = 5 name2 = 20 print name2 a, b, c = 1, 2, 3 |
prima = 1 rossa = "Il colore rosso" léger = 30 |
# 只能用英文字母做变量名: first = 1 red = "Il colore rosso" light = 30 |
__del__ method of classes is usually left undefined. |
|
try: fin = file("absent_file.txt") except: ... try: something() except: ... |
# 通常的声明异常并捕获: try: fin = file("absent_file.txt") except IOError: ... try: something() except someException: ... |
except ImportError, IOError: ... | except (ImportError, IOError): ... |
bytes = array.array('B', [0] * nbytes) # 或者: from itertools import repeat bytes = array.array('B', repeat(0, nbytes)) |
# 下面的方式更快些 bytes = array.array('B', [0]) * nbytes |
freqs = {} for c in "abracadabra": try: freqs[c] += 1 except: freqs[c] = 1 |
# 简单方式: freqs = {} for c in "abracadabra": freqs[c] = freqs.get(c, 0) + 1 # 常用方式: freqs = {} for c in "abracadabra": if c in freqs: freqs[c] += 1 else: freqs[c] = 1 # 或者在Python 2.5以上版本使用: from collections import defaultdict freqs = defaultdict(int) for c in "abracadabra": freqs[c] += 1 |
someitems = set([1, 2, 3]) |
someitems = set([1, 2, 3]) somemap = {1: 2, 3: 4, 5: 6} print iter(someitems).next() print iter(somemap).next() |
from time import clock | # 下面的导入方式在Windows和LINUX下都工作正常: from timeit import default_timer as clock # 或者经常使用timeit模块来计算程序运行时间 |
请同时参阅Python代码规范: http://www.python.org/dev/peps/pep-0008/