generatefeed.py 3.17 KB
# Command-line script to generate a RSS feed from a bunch of well-formated
# JSON items.
# This script tries to be the more generic possible. The items used to generate
# the feed must be JSON-formatted (because of simplicity to read/write them),
# and their keys must follow the names of elements of items as described
# in the RSS2 specification :
#   http://cyber.law.harvard.edu/rss/rss.html#hrelementsOfLtitemgt


import argparse
import collections
import datetime
import json
import os
import PyRSS2Gen as rss

def parseArguments():
  parser = argparse.ArgumentParser()
  parser.add_argument('--output', dest='output', type=str, required=True,
                      help='Path where to save the file')
  parser.add_argument('--status-item-path', dest='status_item_path',
                      type=str, required=True,
                      help='Path where to find feed items')
  parser.add_argument('--max-item', dest='max_item', type=int,
                      default=50, required=False,
                      help='Maximum number of items in the feed')

  parser.add_argument('--title', dest='feed_title', type=str, required=True,
                      help='Title of the feed')
  parser.add_argument('--link', dest='feed_link', type=str, required=True,
                      help='Link of the feed')
  parser.add_argument('--description', dest='feed_description',
                      type=str, required=False,
                      help='Description of the feed')

  option = parser.parse_args()
  if not hasattr(option, 'feed_description'):
    option.feed_description = option.feed_title

  return option


def deleteFileList(file_list):
  for file in file_list:
    try:
      os.unlink(file)
    except OSError:
      pass


def getRSSItemListFromItemDict(item_dict):
  rss_item_list = []

  for item in item_dict:
    item_dict[item]['pubDate'] = datetime.datetime.fromtimestamp(item_dict[item]['pubDate'])
    rss_item_list.append(rss.RSSItem(**item_dict[item]))

  return rss_item_list


def generateFeed(option):
  item_dict = {} # {file: content}

  for filename in os.listdir(option.status_item_path):
    file_path = os.path.join(option.status_item_path, filename)
    with open(file_path, 'r') as fd:
      try:
        item_dict[file_path] = json.load(fd)
      except ValueError:
        # JSON couldn't be decoded, let's trash it as
        # no useful information can be extracted
        os.unlink(file_path)

  sorted_item_dict = collections.OrderedDict(
      sorted(item_dict.items(), key=lambda x: x[1]['pubDate']))

  # Reduces feed if number of items exceeds max_item
  if len(item_dict) > option.max_item:
    outdated_key_list = list(sorted_item_dict)[:-option.max_item]
    for outdated_key in outdated_key_list:
      del sorted_item_dict[outdated_key]
    deleteFileList(outdated_key_list)

  # Generate feed
  feed = rss.RSS2(
    title=option.feed_title,
    link=option.feed_link,
    description=option.feed_description,
    lastBuildDate = datetime.datetime.now(),
    items = getRSSItemListFromItemDict(sorted_item_dict)
  )

  return feed.to_xml()


def main():
  option = parseArguments()
  feed = generateFeed(option)
  open(option.output, 'w').write(feed)


if __name__ == "__main__":
  main()