58zhaopin/config_spider/pipelines.py
2020-04-27 05:29:41 +08:00

28 lines
909 B
Python

# -*- coding: utf-8 -*-
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
import os
from pymongo import MongoClient
mongo = MongoClient(
host=os.environ.get('CRAWLAB_MONGO_HOST') or 'localhost',
port=int(os.environ.get('CRAWLAB_MONGO_PORT') or 27017),
username=os.environ.get('CRAWLAB_MONGO_USERNAME'),
password=os.environ.get('CRAWLAB_MONGO_PASSWORD'),
authSource=os.environ.get('CRAWLAB_MONGO_AUTHSOURCE') or 'admin'
)
db = mongo[os.environ.get('CRAWLAB_MONGO_DB') or 'test']
col = db[os.environ.get('CRAWLAB_COLLECTION') or 'test']
task_id = os.environ.get('CRAWLAB_TASK_ID')
class ConfigSpiderPipeline(object):
def process_item(self, item, spider):
item['task_id'] = task_id
if col is not None:
col.save(item)
return item