28 lines
909 B
Python
28 lines
909 B
Python
# -*- coding: utf-8 -*-
|
|
|
|
# Define your item pipelines here
|
|
#
|
|
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
|
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
|
|
|
import os
|
|
from pymongo import MongoClient
|
|
|
|
mongo = MongoClient(
|
|
host=os.environ.get('CRAWLAB_MONGO_HOST') or 'localhost',
|
|
port=int(os.environ.get('CRAWLAB_MONGO_PORT') or 27017),
|
|
username=os.environ.get('CRAWLAB_MONGO_USERNAME'),
|
|
password=os.environ.get('CRAWLAB_MONGO_PASSWORD'),
|
|
authSource=os.environ.get('CRAWLAB_MONGO_AUTHSOURCE') or 'admin'
|
|
)
|
|
db = mongo[os.environ.get('CRAWLAB_MONGO_DB') or 'test']
|
|
col = db[os.environ.get('CRAWLAB_COLLECTION') or 'test']
|
|
task_id = os.environ.get('CRAWLAB_TASK_ID')
|
|
|
|
class ConfigSpiderPipeline(object):
|
|
def process_item(self, item, spider):
|
|
item['task_id'] = task_id
|
|
if col is not None:
|
|
col.save(item)
|
|
return item
|