Skip to content

Commit

Permalink
Changed default representation of items to pretty-printed dicts. This…
Browse files Browse the repository at this point in the history
… improves

default logging by making log more readable in the default case, for both Scraped and Dropped lines.

Projects can still customize how items are represented by overriding the item's __str__ method, as usual.
pablohoffman committed Jun 3, 2011
1 parent 1bc2339 commit 5bf733b
Showing 6 changed files with 17 additions and 13 deletions.
10 changes: 8 additions & 2 deletions docs/intro/tutorial.rst
Original file line number Diff line number Diff line change
@@ -405,8 +405,14 @@ scraped so far, the final code for our Spider would be like this::

Now doing a crawl on the dmoz.org domain yields ``DmozItem``'s::

[dmoz] DEBUG: Scraped DmozItem(desc=[u' - By David Mertz; Addison Wesley. Book in progress, full text, ASCII format. Asks for feedback. [author website, Gnosis Software, Inc.]\n'], link=[u'http://gnosis.cx/TPiP/'], title=[u'Text Processing in Python']) in <http://www.dmoz.org/Computers/Programming/Languages/Python/Books/>
[dmoz] DEBUG: Scraped DmozItem(desc=[u' - By Sean McGrath; Prentice Hall PTR, 2000, ISBN 0130211192, has CD-ROM. Methods to build XML applications fast, Python tutorial, DOM and SAX, new Pyxie open source XML processing library. [Prentice Hall PTR]\n'], link=[u'http://www.informit.com/store/product.aspx?isbn=0130211192'], title=[u'XML Processing with Python']) in <http://www.dmoz.org/Computers/Programming/Languages/Python/Books/>
[dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/>
{'desc': [u' - By David Mertz; Addison Wesley. Book in progress, full text, ASCII format. Asks for feedback. [author website, Gnosis Software, Inc.\n],
'link': [u'http://gnosis.cx/TPiP/'],
'title': [u'Text Processing in Python']}
[dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/>
{'desc': [u' - By Sean McGrath; Prentice Hall PTR, 2000, ISBN 0130211192, has CD-ROM. Methods to build XML applications fast, Python tutorial, DOM and SAX, new Pyxie open source XML processing library. [Prentice Hall PTR]\n'],
'link': [u'http://www.informit.com/store/product.aspx?isbn=0130211192'],
'title': [u'XML Processing with Python']}

Storing the scraped data
========================
7 changes: 2 additions & 5 deletions scrapy/item.py
Original file line number Diff line number Diff line change
@@ -4,6 +4,7 @@
See documentation in docs/topics/item.rst
"""

from pprint import pformat
from UserDict import DictMixin

from scrapy.utils.trackref import object_ref
@@ -72,11 +73,7 @@ def keys(self):
return self._values.keys()

def __repr__(self):
"""Generate a representation of this item that can be used to
reconstruct the item by evaluating it
"""
values = ', '.join('%s=%r' % field for field in self.iteritems())
return "%s(%s)" % (self.__class__.__name__, values)
return pformat(dict(self))


class Item(DictItem):
5 changes: 3 additions & 2 deletions scrapy/logformatter.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os

class LogFormatter(object):
"""Class for generating log messages for different actions. All methods
@@ -12,7 +13,7 @@ def crawled(self, request, response, spider):
request, referer, flags)

def scraped(self, item, response, spider):
return "Scraped %s in <%s>" % (item, response.url)
return "Scraped from %s" % response + os.linesep + str(item)

def dropped(self, item, exception, response, spider):
return "Dropped %s - %s" % (item, unicode(exception))
return "Dropped: %s" % unicode(exception) + os.linesep + str(item)
2 changes: 1 addition & 1 deletion scrapy/tests/test_command_shell.py
Original file line number Diff line number Diff line change
@@ -12,7 +12,7 @@ class ShellTest(ProcessTest, SiteTest, unittest.TestCase):
@defer.inlineCallbacks
def test_empty(self):
_, out, _ = yield self.execute(['-c', 'item'])
assert 'Item' in out
assert '{}' in out

@defer.inlineCallbacks
def test_response_body(self):
2 changes: 1 addition & 1 deletion scrapy/tests/test_item.py
Original file line number Diff line number Diff line change
@@ -50,7 +50,7 @@ class TestItem(Item):
i['number'] = 123
itemrepr = repr(i)
self.assertEqual(itemrepr,
"TestItem(name=u'John Doe', number=123)")
"{'name': u'John Doe', 'number': 123}")

i2 = eval(itemrepr)
self.assertEqual(i2['name'], 'John Doe')
4 changes: 2 additions & 2 deletions scrapy/tests/test_logformatter.py
Original file line number Diff line number Diff line change
@@ -26,8 +26,8 @@ def test_dropped(self):
item = {}
exception = Exception(u"\u2018")
response = Response("http://www.example.com")
self.assertEqual(self.formatter.dropped(item, exception, response, self.spider),
u"Dropped {} - \u2018")
lines = self.formatter.dropped(item, exception, response, self.spider).splitlines()
self.assertEqual(lines, [u"Dropped: \u2018", '{}'])

if __name__ == "__main__":
unittest.main()

0 comments on commit 5bf733b

Please sign in to comment.