I can run crawl in a python script with the following recipe from wiki :
from twisted.internet import reactor
from scrapy.crawler import Crawler
from scrapy
You need to modify your __init__() constructor to accept the date argument. Also, I would use datetime.strptime() to parse the date string:
from datetime import datetime
class MySpider(CrawlSpider):
name = 'tw'
allowed_domains = ['test.com']
def __init__(self, *args, **kwargs):
super(MySpider, self).__init__(*args, **kwargs)
date = kwargs.get('date')
if not date:
raise ValueError('No date given')
dt = datetime.strptime(date, "%m-%d-%Y")
self.start_urls = ['http://test.com/{dt.year}-{dt.month}-{dt.day}'.format(dt=dt)]
Then, you would instantiate the spider this way:
spider = MySpider(date='01-01-2015')
Or, you can even avoid parsing the date at all, passing a datetime instance in the first place:
class MySpider(CrawlSpider):
name = 'tw'
allowed_domains = ['test.com']
def __init__(self, *args, **kwargs):
super(MySpider, self).__init__(*args, **kwargs)
dt = kwargs.get('dt')
if not dt:
raise ValueError('No date given')
self.start_urls = ['http://test.com/{dt.year}-{dt.month}-{dt.day}'.format(dt=dt)]
spider = MySpider(dt=datetime(year=2014, month=01, day=01))
And, just FYI, see this answer as a detailed example about how to run Scrapy from script.