cpython/Lib/csv.py at main · python/cpython

cvs is going to be a module again

Apr 24, 2003

1

2

"""

3

csv.py - read/write/investigate CSV files

4

"""

5

6

import re

Add __class_getitem__ to csv.DictReader and csv.DictWriter (#92393 )

May 8, 2022

7

import types

cvs is going to be a module again

Apr 24, 2003

8

from _csv import Error, __version__, writer, reader, register_dialect, \

9

unregister_dialect, get_dialect, list_dialects, \

Rename csv.set_field_limit to csv.field_size_limit (since it both set…

Jan 12, 2005

10

field_size_limit, \

cvs is going to be a module again

Apr 24, 2003

11

QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE, \

12

__doc__

Replace python-coded validation of csv dialect with a call to the C

Jan 11, 2005

13

from _csv import Dialect as _Dialect

cvs is going to be a module again

Apr 24, 2003

14

Change some uses of cStringIO.StringIO to io.StringIO.

May 18, 2007

15

from io import StringIO

rework Sniffer api significantly

Apr 25, 2003

16

Issue #23883 : Add missing APIs to __all__; patch by Jacek Kołodziej

Nov 14, 2015

17

__all__ = ["QUOTE_MINIMAL", "QUOTE_ALL", "QUOTE_NONNUMERIC", "QUOTE_NONE",

18

"Error", "Dialect", "__doc__", "excel", "excel_tab",

19

"field_size_limit", "reader", "writer",

20

"register_dialect", "get_dialect", "list_dialects", "Sniffer",

21

"unregister_dialect", "__version__", "DictReader", "DictWriter",

22

"unix_dialect"]

cvs is going to be a module again

Apr 24, 2003

23

24

class Dialect:

#5975 : add unix_dialect to csv module.

Oct 27, 2010

25

"""Describe a CSV dialect.

add a couple missing docstrings

Jan 5, 2005

26

27

This must be subclassed (see csv.excel). Valid attributes are:

28

delimiter, quotechar, escapechar, doublequote, skipinitialspace,

29

lineterminator, quoting.

30

31

"""

cvs is going to be a module again

Apr 24, 2003

32

_name = ""

33

_valid = False

34

# placeholders

35

delimiter = None

36

quotechar = None

37

escapechar = None

38

doublequote = None

39

skipinitialspace = None

40

lineterminator = None

41

quoting = None

42

43

def __init__(self):

44

if self.__class__ != Dialect:

45

self._valid = True

Replace python-coded validation of csv dialect with a call to the C

Jan 11, 2005

46

self._validate()

cvs is going to be a module again

Apr 24, 2003

47

48

def _validate(self):

Replace python-coded validation of csv dialect with a call to the C

Jan 11, 2005

49

try:

50

_Dialect(self)

SF patch 1631942 by Collin Winter:

Jan 10, 2007

51

except TypeError as e:

Replace python-coded validation of csv dialect with a call to the C

Jan 11, 2005

52

# We do this for compatibility with py2.3

53

raise Error(str(e))

cvs is going to be a module again

Apr 24, 2003

54

55

class excel(Dialect):

add a couple missing docstrings

Jan 5, 2005

56

"""Describe the usual properties of Excel-generated CSV files."""

cvs is going to be a module again

Apr 24, 2003

57

delimiter = ','

58

quotechar = '"'

59

doublequote = True

60

skipinitialspace = False

61

lineterminator = '\r\n'

62

quoting = QUOTE_MINIMAL

63

register_dialect("excel", excel)

64

65

class excel_tab(excel):

add a couple missing docstrings

Jan 5, 2005

66

"""Describe the usual properties of Excel-generated TAB-delimited files."""

cvs is going to be a module again

Apr 24, 2003

67

delimiter = '\t'

68

register_dialect("excel-tab", excel_tab)

69

#5975 : add unix_dialect to csv module.

Oct 27, 2010

70

class unix_dialect(Dialect):

71

"""Describe the usual properties of Unix-generated CSV files."""

72

delimiter = ','

73

quotechar = '"'

74

doublequote = True

75

skipinitialspace = False

76

lineterminator = '\n'

77

quoting = QUOTE_ALL

78

register_dialect("unix", unix_dialect)

79

cvs is going to be a module again

Apr 24, 2003

80

81

class DictReader:

Make the fieldnames argument optional in the DictReader. If self.fiel…

Oct 3, 2003

82

def __init__(self, f, fieldnames=None, restkey=None, restval=None,

**kwds arg was missing from __init__ for Dict{Reader,Writer} classes.

Sep 6, 2003

83

dialect="excel", *args, **kwds):

gh-76728 : Coerce DictReader and DictWriter fieldnames argument to a l…

Aug 25, 2022

84

if fieldnames is not None and iter(fieldnames) is fieldnames:

85

fieldnames = list(fieldnames)

Merged revisions 65605 via svnmerge from

Aug 9, 2008

86

self._fieldnames = fieldnames # list of keys for the dict

cvs is going to be a module again

Apr 24, 2003

87

self.restkey = restkey # key to catch long rows

88

self.restval = restval # default value for short rows

**kwds arg was missing from __init__ for Dict{Reader,Writer} classes.

Sep 6, 2003

89

self.reader = reader(f, dialect, *args, **kwds)

Merged revisions 61687-61688,61696,61700,61704-61705,61707-61709,6171…

Mar 22, 2008

90

self.dialect = dialect

91

self.line_num = 0

cvs is going to be a module again

Apr 24, 2003

92

93

def __iter__(self):

94

return self

95

Merged revisions 65605 via svnmerge from

Aug 9, 2008

96

@property

97

def fieldnames(self):

98

if self._fieldnames is None:

99

try:

100

self._fieldnames = next(self.reader)

101

except StopIteration:

102

pass

103

self.line_num = self.reader.line_num

104

return self._fieldnames

105

106

@fieldnames.setter

107

def fieldnames(self, value):

108

self._fieldnames = value

109

PEP 3114: rename .next() to .__next__() and add next() builtin.

Apr 21, 2007

110

def __next__(self):

Merged revisions 65605 via svnmerge from

Aug 9, 2008

111

if self.line_num == 0:

112

# Used only for its side effect.

113

self.fieldnames

PEP 3114: rename .next() to .__next__() and add next() builtin.

Apr 21, 2007

114

row = next(self.reader)

Merged revisions 61687-61688,61696,61700,61704-61705,61707-61709,6171…

Mar 22, 2008

115

self.line_num = self.reader.line_num

Make the fieldnames argument optional in the DictReader. If self.fiel…

Oct 3, 2003

116

cvs is going to be a module again

Apr 24, 2003

117

# unlike the basic reader, we prefer not to return blanks,

118

# because we will typically wind up with a dict full of None

119

# values

120

while row == []:

PEP 3114: rename .next() to .__next__() and add next() builtin.

Apr 21, 2007

121

row = next(self.reader)

bpo-34003 : Use dict instead of OrderedDict in csv.DictReader (GH-8014 )

Jan 31, 2019

122

d = dict(zip(self.fieldnames, row))

cvs is going to be a module again

Apr 24, 2003

123

lf = len(self.fieldnames)

124

lr = len(row)

125

if lf < lr:

126

d[self.restkey] = row[lf:]

127

elif lf > lr:

128

for key in self.fieldnames[lr:]:

129

d[key] = self.restval

130

return d

131

Add __class_getitem__ to csv.DictReader and csv.DictWriter (#92393 )

May 8, 2022

132

__class_getitem__ = classmethod(types.GenericAlias)

133

cvs is going to be a module again

Apr 24, 2003

134

135

class DictWriter:

136

def __init__(self, f, fieldnames, restval="", extrasaction="raise",

**kwds arg was missing from __init__ for Dict{Reader,Writer} classes.

Sep 6, 2003

137

dialect="excel", *args, **kwds):

gh-76728 : Coerce DictReader and DictWriter fieldnames argument to a l…

Aug 25, 2022

138

if fieldnames is not None and iter(fieldnames) is fieldnames:

139

fieldnames = list(fieldnames)

cvs is going to be a module again

Apr 24, 2003

140

self.fieldnames = fieldnames # list of keys for the dict

141

self.restval = restval # for writing short dicts

bpo-44512 : Fix handling of extrasactions arg to csv.DictWriter with m…

Dec 9, 2022

142

extrasaction = extrasaction.lower()

143

if extrasaction not in ("raise", "ignore"):

Raise statement normalization in Lib/.

Aug 30, 2007

144

raise ValueError("extrasaction (%s) must be 'raise' or 'ignore'"

145

% extrasaction)

cvs is going to be a module again

Apr 24, 2003

146

self.extrasaction = extrasaction

**kwds arg was missing from __init__ for Dict{Reader,Writer} classes.

Sep 6, 2003

147

self.writer = writer(f, dialect, *args, **kwds)

cvs is going to be a module again

Apr 24, 2003

148

Merged revisions 78384 via svnmerge from

Feb 23, 2010

149

def writeheader(self):

150

header = dict(zip(self.fieldnames, self.fieldnames))

bpo-27497 : Add return value to csv.DictWriter.writeheader (GH-12306 )

May 10, 2019

151

return self.writerow(header)

Merged revisions 78384 via svnmerge from

Feb 23, 2010

152

cvs is going to be a module again

Apr 24, 2003

153

def _dict_to_list(self, rowdict):

154

if self.extrasaction == "raise":

Issue #18219 : Optimize csv.DictWriter for large number of columns.

Oct 21, 2016

155

wrong_fields = rowdict.keys() - self.fieldnames

Merged revisions 53952-54987 via svnmerge from

Apr 27, 2007

156

if wrong_fields:

Raise statement normalization in Lib/.

Aug 30, 2007

157

raise ValueError("dict contains fields not in fieldnames: "

#19449 : Handle non-string keys when generating 'fieldnames' error.

Nov 19, 2013

158

+ ", ".join([repr(x) for x in wrong_fields]))

Issue #23171 : csv.Writer.writerow() now supports arbitrary iterables.

Mar 30, 2015

159

return (rowdict.get(key, self.restval) for key in self.fieldnames)

cvs is going to be a module again

Apr 24, 2003

160

161

def writerow(self, rowdict):

162

return self.writer.writerow(self._dict_to_list(rowdict))

163

164

def writerows(self, rowdicts):

Issue #23171 : csv.Writer.writerow() now supports arbitrary iterables.

Mar 30, 2015

165

return self.writer.writerows(map(self._dict_to_list, rowdicts))

cvs is going to be a module again

Apr 24, 2003

166

Add __class_getitem__ to csv.DictReader and csv.DictWriter (#92393 )

May 8, 2022

167

__class_getitem__ = classmethod(types.GenericAlias)

168

cvs is going to be a module again

Apr 24, 2003

169

170

class Sniffer:

171

'''

172

"Sniffs" the format of a CSV file (i.e. delimiter, quotechar)

rework Sniffer api significantly

Apr 25, 2003

173

Returns a Dialect object.

cvs is going to be a module again

Apr 24, 2003

174

'''

rework Sniffer api significantly

Apr 25, 2003

175

def __init__(self):

cvs is going to be a module again

Apr 24, 2003

176

# in case there is more than one possible delimiter

177

self.preferred = [',', '\t', ';', ' ', ':']

178

179

* Correct Sniffer doc to correspond to the implementation.

May 19, 2003

180

def sniff(self, sample, delimiters=None):

cvs is going to be a module again

Apr 24, 2003

181

"""

rework Sniffer api significantly

Apr 25, 2003

182

Returns a dialect (or None) corresponding to the sample

cvs is going to be a module again

Apr 24, 2003

183

"""

184

Merged revisions 74779-74786,74793,74795,74811,74860-74861,74863,7487…

Oct 4, 2009

185

quotechar, doublequote, delimiter, skipinitialspace = \

* Correct Sniffer doc to correspond to the implementation.

May 19, 2003

186

self._guess_quote_and_delimiter(sample, delimiters)

Fix a delimiter detection problem in sniffer. Sniffing "a|b|c\r\n" was

Dec 30, 2005

187

if not delimiter:

* Correct Sniffer doc to correspond to the implementation.

May 19, 2003

188

delimiter, skipinitialspace = self._guess_delimiter(sample,

189

delimiters)

cvs is going to be a module again

Apr 24, 2003

190

Fix a delimiter detection problem in sniffer. Sniffing "a|b|c\r\n" was

Dec 30, 2005

191

if not delimiter:

Raise statement normalization in Lib/.

Aug 30, 2007

192

raise Error("Could not determine delimiter")

Fix a delimiter detection problem in sniffer. Sniffing "a|b|c\r\n" was

Dec 30, 2005

193

rework Sniffer api significantly

Apr 25, 2003

194

class dialect(Dialect):

cvs is going to be a module again

Apr 24, 2003

195

_name = "sniffed"

196

lineterminator = '\r\n'

Attempt to deal with some obvious errors in the code. These were all

Apr 25, 2003

197

quoting = QUOTE_MINIMAL

cvs is going to be a module again

Apr 24, 2003

198

# escapechar = ''

199

Merged revisions 74779-74786,74793,74795,74811,74860-74861,74863,7487…

Oct 4, 2009

200

dialect.doublequote = doublequote

rework Sniffer api significantly

Apr 25, 2003

201

dialect.delimiter = delimiter

202

# _csv.reader won't accept a quotechar of ''

203

dialect.quotechar = quotechar or '"'

204

dialect.skipinitialspace = skipinitialspace

cvs is going to be a module again

Apr 24, 2003

205

rework Sniffer api significantly

Apr 25, 2003

206

return dialect

cvs is going to be a module again

Apr 24, 2003

207

208

* Correct Sniffer doc to correspond to the implementation.

May 19, 2003

209

def _guess_quote_and_delimiter(self, data, delimiters):

cvs is going to be a module again

Apr 24, 2003

210

"""

211

Looks for text enclosed between two identical quotes

212

(the probable quotechar) which are preceded and followed

213

by the same character (the probable delimiter).

214

For example:

215

,'some text',

216

The quote with the most wins, same with the delimiter.

217

If there is no quotechar the delimiter can't be determined

218

this way.

219

"""

220

221

matches = []

#27364 : fix "incorrect" uses of escape character in the stdlib.

Sep 8, 2016

222

        for restr in (r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?P=delim)', # ,".*?",

223

                      r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?P<delim>[^\w\n"\'])(?P<space> ?)',   #  ".*?",

bpo-30157 : Fix csv.Sniffer.sniff() regex pattern. (GH-5601 )

Feb 9, 2018

224

                      r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)',   # ,".*?"

#27364 : fix "incorrect" uses of escape character in the stdlib.

Sep 8, 2016

225

                      r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?:$|\n)'):                            #  ".*?" (no delim, no space)

use the long names for re compilation options; this makes it easier to

Sep 2, 2003

226

regexp = re.compile(restr, re.DOTALL | re.MULTILINE)

cvs is going to be a module again

Apr 24, 2003

227

matches = regexp.findall(data)

228

if matches:

229

break

230

231

if not matches:

Merged revisions 74779-74786,74793,74795,74811,74860-74861,74863,7487…

Oct 4, 2009

232

# (quotechar, doublequote, delimiter, skipinitialspace)

233

return ('', False, None, 0)

cvs is going to be a module again

Apr 24, 2003

234

quotes = {}

235

delims = {}

236

spaces = 0

Issue #14260 : The groupindex attribute of regular expression pattern …

Mar 29, 2015

237

groupindex = regexp.groupindex

cvs is going to be a module again

Apr 24, 2003

238

for m in matches:

Issue #14260 : The groupindex attribute of regular expression pattern …

Mar 29, 2015

239

n = groupindex['quote'] - 1

cvs is going to be a module again

Apr 24, 2003

240

key = m[n]

241

if key:

242

quotes[key] = quotes.get(key, 0) + 1

243

try:

Issue #14260 : The groupindex attribute of regular expression pattern …

Mar 29, 2015

244

n = groupindex['delim'] - 1

cvs is going to be a module again

Apr 24, 2003

245

key = m[n]

246

except KeyError:

247

continue

* Correct Sniffer doc to correspond to the implementation.

May 19, 2003

248

if key and (delimiters is None or key in delimiters):

cvs is going to be a module again

Apr 24, 2003

249

delims[key] = delims.get(key, 0) + 1

250

try:

Issue #14260 : The groupindex attribute of regular expression pattern …

Mar 29, 2015

251

n = groupindex['space'] - 1

cvs is going to be a module again

Apr 24, 2003

252

except KeyError:

253

continue

254

if m[n]:

255

spaces += 1

256

Kill reduce(). A coproduction of John Reese, Jacques Frechet, and Ale…

Aug 22, 2006

257

quotechar = max(quotes, key=quotes.get)

cvs is going to be a module again

Apr 24, 2003

258

259

if delims:

Kill reduce(). A coproduction of John Reese, Jacques Frechet, and Ale…

Aug 22, 2006

260

delim = max(delims, key=delims.get)

cvs is going to be a module again

Apr 24, 2003

261

skipinitialspace = delims[delim] == spaces

262

if delim == '\n': # most likely a file with a single column

263

delim = ''

264

else:

265

# there is *no* delimiter, it's a single column of quoted data

266

delim = ''

267

skipinitialspace = 0

268

Merged revisions 74779-74786,74793,74795,74811,74860-74861,74863,7487…

Oct 4, 2009

269

# if we see an extra quote between delimiters, we've got a

270

# double quoted format

#18155 : Regex-escape delimiter, in case it is a regex special char.

Jun 29, 2013

271

dq_regexp = re.compile(

272

                               r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \

273

{'delim':re.escape(delim), 'quote':quotechar}, re.MULTILINE)

Merged revisions 74779-74786,74793,74795,74811,74860-74861,74863,7487…

Oct 4, 2009

274

275

276

277

if dq_regexp.search(data):

278

doublequote = True

279

else:

280

doublequote = False

281

282

return (quotechar, doublequote, delim, skipinitialspace)

cvs is going to be a module again

Apr 24, 2003

283

284

* Correct Sniffer doc to correspond to the implementation.

May 19, 2003

285

def _guess_delimiter(self, data, delimiters):

cvs is going to be a module again

Apr 24, 2003

286

"""

287

The delimiter /should/ occur the same number of times on

288

each row. However, due to malformed data, it may not. We don't want

289

an all or nothing approach, so we allow for small variations in this

290

number.

291

1) build a table of the frequency of each character on every line.

#11565 : Fix several typos. Patch by Piotr Kasprzyk.

Mar 16, 2011

292

2) build a table of frequencies of this frequency (meta-frequency?),

cvs is going to be a module again

Apr 24, 2003

293

e.g. 'x occurred 5 times in 10 rows, 6 times in 1000 rows,

294

7 times in 2 rows'

295

3) use the mode of the meta-frequency to determine the /expected/

296

frequency for that character

297

4) find out how often the character actually meets that goal

298

5) the character that best meets its goal is the delimiter

299

For performance reasons, the data is evaluated in chunks, so it can

300

try and evaluate the smallest portion of the data possible, evaluating

301

additional chunks as necessary.

302

"""

303

Merged revisions 56125-56153 via svnmerge from

Jul 3, 2007

304

data = list(filter(None, data.split('\n')))

cvs is going to be a module again

Apr 24, 2003

305

306

ascii = [chr(c) for c in range(127)] # 7-bit ASCII

307

308

# build frequency tables

309

chunkLength = min(10, len(data))

310

iteration = 0

311

charFrequency = {}

312

modes = {}

313

delims = {}

Issue #29405 : Make total calculation in _guess_delimiter more accurate.

Feb 6, 2017

314

start, end = 0, chunkLength

cvs is going to be a module again

Apr 24, 2003

315

while start < len(data):

316

iteration += 1

317

for line in data[start:end]:

318

for char in ascii:

rework Sniffer api significantly

Apr 25, 2003

319

metaFrequency = charFrequency.get(char, {})

cvs is going to be a module again

Apr 24, 2003

320

# must count even if frequency is 0

Fix for problem with Sniffer class. If your delimiter is whitespace a…

Dec 28, 2005

321

freq = line.count(char)

cvs is going to be a module again

Apr 24, 2003

322

# value is the mode

rework Sniffer api significantly

Apr 25, 2003

323

metaFrequency[freq] = metaFrequency.get(freq, 0) + 1

324

charFrequency[char] = metaFrequency

cvs is going to be a module again

Apr 24, 2003

325

326

for char in charFrequency.keys():

- PEP 3106: dict.iterkeys(), .iteritems(), .itervalues() are now gone;

Feb 11, 2007

327

items = list(charFrequency[char].items())

cvs is going to be a module again

Apr 24, 2003

328

if len(items) == 1 and items[0][0] == 0:

329

continue

330

# get the mode of the frequencies

331

if len(items) > 1:

Kill reduce(). A coproduction of John Reese, Jacques Frechet, and Ale…

Aug 22, 2006

332

modes[char] = max(items, key=lambda x: x[1])

cvs is going to be a module again

Apr 24, 2003

333

# adjust the mode - subtract the sum of all

334

# other frequencies

335

items.remove(modes[char])

336

modes[char] = (modes[char][0], modes[char][1]

Kill reduce(). A coproduction of John Reese, Jacques Frechet, and Ale…

Aug 22, 2006

337

- sum(item[1] for item in items))

cvs is going to be a module again

Apr 24, 2003

338

else:

339

modes[char] = items[0]

340

341

# build a list of possible delimiters

342

modeList = modes.items()

Issue #29405 : Make total calculation in _guess_delimiter more accurate.

Feb 6, 2017

343

total = float(min(chunkLength * iteration, len(data)))

cvs is going to be a module again

Apr 24, 2003

344

# (rows of consistent data) / (number of rows) = 100%

345

consistency = 1.0

346

# minimum consistency threshold

347

threshold = 0.9

348

while len(delims) == 0 and consistency >= threshold:

349

for k, v in modeList:

350

if v[0] > 0 and v[1] > 0:

* Correct Sniffer doc to correspond to the implementation.

May 19, 2003

351

if ((v[1]/total) >= consistency and

352

(delimiters is None or k in delimiters)):

cvs is going to be a module again

Apr 24, 2003

353

delims[k] = v

354

consistency -= 0.01

355

356

if len(delims) == 1:

- PEP 3106: dict.iterkeys(), .iteritems(), .itervalues() are now gone;

Feb 11, 2007

357

delim = list(delims.keys())[0]

cvs is going to be a module again

Apr 24, 2003

358

skipinitialspace = (data[0].count(delim) ==

359

data[0].count("%c " % delim))

360

return (delim, skipinitialspace)

361

362

# analyze another chunkLength lines

363

start = end

364

end += chunkLength

365

366

if not delims:

367

return ('', 0)

368

369

# if there's more than one, fall back to a 'preferred' list

370

if len(delims) > 1:

371

for d in self.preferred:

372

if d in delims.keys():

373

skipinitialspace = (data[0].count(d) ==

374

data[0].count("%c " % d))

375

return (d, skipinitialspace)

376

Fix a delimiter detection problem in sniffer. Sniffing "a|b|c\r\n" was

Dec 30, 2005

377

# nothing else indicates a preference, pick the character that

378

# dominates(?)

379

items = [(v,k) for (k,v) in delims.items()]

380

items.sort()

381

delim = items[-1][1]

382

cvs is going to be a module again

Apr 24, 2003

383

skipinitialspace = (data[0].count(delim) ==

384

data[0].count("%c " % delim))

385

return (delim, skipinitialspace)

386

387

rework Sniffer api significantly

Apr 25, 2003

388

def has_header(self, sample):

cvs is going to be a module again

Apr 24, 2003

389

# Creates a dictionary of types of data in each column. If any

390

# column is of a single type (say, integers), *except* for the first

391

# row, then the first row is presumed to be labels. If the type

392

# can't be determined, it is assumed to be a string in which case

393

# the length of the string is the determining factor: if all of the

394

# rows except for the first are the same length, it's a header.

395

# Finally, a 'vote' is taken at the end for each column, adding or

396

# subtracting from the likelihood of the first row being a header.

397

rework Sniffer api significantly

Apr 25, 2003

398

rdr = reader(StringIO(sample), self.sniff(sample))

cvs is going to be a module again

Apr 24, 2003

399

PEP 3114: rename .next() to .__next__() and add next() builtin.

Apr 21, 2007

400

header = next(rdr) # assume first row is header

cvs is going to be a module again

Apr 24, 2003

401

402

columns = len(header)

403

columnTypes = {}

404

for i in range(columns): columnTypes[i] = None

405

406

checked = 0

rework Sniffer api significantly

Apr 25, 2003

407

for row in rdr:

cvs is going to be a module again

Apr 24, 2003

408

# arbitrary number of rows to check, to keep it sane

409

if checked > 20:

410

break

411

checked += 1

412

413

if len(row) != columns:

414

continue # skip rows that have irregular number of columns

415

- PEP 3106: dict.iterkeys(), .iteritems(), .itervalues() are now gone;

Feb 11, 2007

416

for col in list(columnTypes.keys()):

bpo-43625 : Enhance csv sniffer has_headers() to be more accurate (GH-…

Jul 30, 2021

417

thisType = complex

418

try:

419

thisType(row[col])

420

except (ValueError, OverflowError):

cvs is going to be a module again

Apr 24, 2003

421

# fallback to length of string

422

thisType = len(row[col])

423

424

if thisType != columnTypes[col]:

425

if columnTypes[col] is None: # add new column type

426

columnTypes[col] = thisType

427

else:

428

# type is inconsistent, remove column from

429

# consideration

430

del columnTypes[col]

431

432

# finally, compare results against first row and "vote"

433

# on whether it's a header

434

hasHeader = 0

435

for col, colType in columnTypes.items():

bpo-44712 : Replace "type(literal)" with corresponding builtin types (G…

May 8, 2022

436

if isinstance(colType, int): # it's a length

cvs is going to be a module again

Apr 24, 2003

437

if len(header[col]) != colType:

438

hasHeader += 1

439

else:

440

hasHeader -= 1

441

else: # attempt typecast

442

try:

SF Patch #744104: Remove eval() from csv

Jun 12, 2003

443

colType(header[col])

One more multiple exception catch should be in a tuple.

Jun 12, 2003

444

except (ValueError, TypeError):

cvs is going to be a module again

Apr 24, 2003

445

hasHeader += 1

446

else:

447

hasHeader -= 1

448

449

return hasHeader > 0