#! /usr/bin/env python # -*- coding: koi8-r -*- __version__ = "$Revision: 74 $"[11:-2] __revision__ = "$Id: reindex_blog.py 74 2007-12-05 21:05:10Z phd $"[5:-2] __date__ = "$Date: 2007-12-06 00:05:10 +0300 (Thu, 06 Dec 2007) $"[7:-2] __author__ = "Oleg Broytman " __copyright__ = "Copyright (C) 2006 PhiloSoft Design" import sys, os blog_data_root = sys.argv[1] blog_root = sys.argv[2] blog_filename = os.path.join(blog_data_root, "blog_dict.pickle") try: import cPickle as pickle except ImportError: import pickle from Cheetah.Template import Template # Load old blog try: blog_file = open(blog_filename, "rb") except IOError: old_blog = {} else: old_blog = pickle.load(blog_file) blog_file.close() # blog is a dictionary mapping # (year, month, day) => [list of (file, title, lead, tags)] blog = {} years = {} # Walk the directory recursively for dirpath, dirs, files in os.walk(blog_root): d = os.path.basename(dirpath) if not d.startswith("20") and not d.isdigit(): continue for file in files: if not file.endswith(".tmpl"): continue fullpath = os.path.join(dirpath, file) template = Template(file=fullpath) title_parts = template.Title.split() title = ' '.join(title_parts[6:]) lead = getattr(template, "Lead", None) tags = template.Tag if isinstance(tags, basestring): tags = (tags,) if title: key = year, month, day = tuple(dirpath[len(blog_root):].split(os.sep)[1:]) if key in blog: days = blog[key] else: days = blog[key] = [] days.append((file, title, lead, tags)) if year in years: months = years[year] else: months = years[year] = {} if month in months: days = months[month] else: days = months[month] = [] if day not in days: days.append(day) # Need to save the blog? if blog <> old_blog: blog_file = open(blog_filename, "wb") pickle.dump(blog, blog_file, pickle.HIGHEST_PROTOCOL) blog_file.close() # Localized month names import locale locale.setlocale(locale.LC_ALL, "ru_RU.KOI8-R") from calendar import _localized_day, _localized_month locale.setlocale(locale.LC_TIME, 'C') months_names_en = list(_localized_month('%B')) months_abbrs_en = list(_localized_month('%b')) locale.setlocale(locale.LC_TIME, "ru_RU.KOI8-R") #months_names_ru = list(_localized_month('%B')) months_names_ru = ['', "января", "февраля", "марта", "апреля", "мая", "июня", "июля", "августа", "сентября", "октября", "ноября", "декабря" ] months_names_ru0 = ['', "январь", "февраль", "март", "апрель", "май", "июнь", "июль", "август", "сентябрь", "октябрь", "ноябрь", "декабрь" ] from news import write_if_changed def write_template(level, year, month, day, titles, tags=None): path = [blog_root] if level >= 1: path.append(year) if level >= 2: path.append(month) if level == 3: path.append(day) path.append("index.tmpl") index_name = os.path.join(*path) new_text = ["""\ ## THIS FILE IS AUTOMATICALLY GENERATED. DO NOT EDIT. #extends phd_pp_ru #implements respond """] if level == 0: new_text.append("""\ #attr $Title = "Oleg Broytman's blog" #attr $Description = "Broytman Russian Blog Index Document" #attr $Copyright = %(cyear)s #attr $alternates = (("application/atom+xml", "News [Atom 1.0]", "atom_10.xml"), ("application/rss+xml", "News [RSS 2.0]", "rss_20.xml") ) ## #def body_html

Журнал

""" % {"cyear": year or 2005}) elif level == 1: new_text.append("""\ #attr $Title = "Oleg Broytman's blog: %(year)s" #attr $Description = "Broytman Russian Blog %(year)s Index Document" #attr $Copyright = %(cyear)s ## #def body_html

Журнал: %(year)s

""" % {"year": year, "cyear": year or 2005}) elif level == 2: imonth = int(month) new_text.append("""\ #attr $Title = "Oleg Broytman's blog: %(month_abbr_en)s %(year)s" #attr $Description = "Broytman Russian Blog %(month_name_en)s %(year)s Index Document" #attr $Copyright = %(cyear)s ## #def body_html

Журнал: %(month_name_ru0)s %(year)s

""" % { "year": year, "cyear": year or 2005, "month_abbr_en": months_abbrs_en[imonth], "month_name_en": months_names_en[imonth], "month_name_ru0": months_names_ru0[imonth], }) elif level == 3: iday = int(day) imonth = int(month) new_text.append("""\ #attr $Next = "%s" """ % titles[0][3]) if len(titles) == 1: new_text.append("""\ #attr $refresh = "0; URL=%s" """ % titles[0][3]) new_text.append("""\ #attr $Title = "Oleg Broytman's blog: %(day)d %(month_abbr_en)s %(year)s" #attr $Description = "Broytman Russian Blog %(day)d %(month_name_en)s %(year)s Index Document" #attr $Copyright = %(cyear)s ## #def body_html

Журнал: %(day)d %(month_name_ru)s %(year)s

""" % { "year": year, "cyear": year or 2005, "month_abbr_en": months_abbrs_en[imonth], "month_name_en": months_names_en[imonth], "month_name_ru": months_names_ru[imonth], "day": iday }) save_titles = titles[:] titles.reverse() save_date = None for year, month, day, file, title, lead in titles: href = [] if level == 0: href.append(year) if level <= 1: href.append(month) if level <= 2: href.append(day) href.append(file) href = '/'.join(href) if day[0] == '0': day = day[1:] if save_date <> (year, month, day): if level == 0: new_text.append('\n

%s %s %s

' % (day, months_names_ru[int(month)], year)) else: new_text.append('\n

%s %s

' % (day, months_names_ru[int(month)])) save_date = year, month, day if lead: lead = lead + ' ' else: lead = '' new_text.append('''

%s%s.

''' % (lead, href, title)) if level == 0: new_text.append("""

Новостевая лента в форматах Atom 1.0 и RSS 2.0 .

""") years = {} for year, month, day, file, title, lead in save_titles: years[year] = True new_text.append('''

Теги: ''') first_tag = True for count, tag, links in all_tags: if first_tag: first_tag = False else: new_text.append(' - ') new_text.append("""%s (%d)""" % (tag, tag, count)) new_text.append('''

''') max_year = int(sorted(years.keys())[-1]) years = range(2005, max_year+1) new_text.append('''

По годам: ''') first_year = True for year in years: if first_year: first_year = False else: new_text.append(' - ') new_text.append('%s' % (year, year)) new_text.append('''

''') new_text.append("""

ЖЖ """) new_text.append("""\ #end def $phd_pp_ru.respond(self) """) write_if_changed(index_name, ''.join(new_text)) all_tags = {} all_titles = [] all_titles_tags = [] for year in sorted(years.keys()): year_titles = [] months = years[year] for month in sorted(months.keys()): month_titles = [] for day in sorted(months[month]): day_titles = [] key = year, month, day if key in blog: for file, title, lead, tags in blog[key]: if file.endswith(".tmpl"): file = file[:-len("tmpl")] + "html" value = (year, month, day, file, title, lead) all_titles_tags.append((year, month, day, file, title, lead, tags)) all_titles.append(value) year_titles.append(value) month_titles.append(value) day_titles.append(value) for tag in tags: if tag in all_tags: tag_links = all_tags[tag] else: tag_links = all_tags[tag] = [] tag_links.append(value) write_template(3, year, month, day, day_titles) write_template(2, year, month, day, month_titles) write_template(1, year, month, day, year_titles) def by_count_rev_tag_link(t1, t2): """Sort all_tags by count in descending order, and by tags and links in ascending order """ r = cmp(t1[0], t2[0]) if r: return -r return cmp((t1[1], t1[2]), (t2[1], t2[2])) all_tags = [(len(links), tag, links) for (tag, links) in all_tags.items()] all_tags.sort(by_count_rev_tag_link) write_template(0, year, month, day, all_titles[-20:], all_tags) new_text = ["""\ ## THIS FILE IS AUTOMATICALLY GENERATED. DO NOT EDIT. #extends phd_pp_ru #implements respond #attr $Title = "Oleg Broytman's blog: tags" #attr $Description = "Broytman Russian Blog Tags Index Document" #attr $Copyright = 2006 ## #def body_html

Теги

"""] for count, tag, links in all_tags: new_text.append("""\
%s (%d)
""" % (tag, tag, count)) tag_text = ["""\ ## THIS FILE IS AUTOMATICALLY GENERATED. DO NOT EDIT. #extends phd_pp_ru #implements respond #attr $Title = "Oleg Broytman's blog: tag %s" #attr $Description = "Broytman Russian Blog Tag %s Index Document" #attr $Copyright = 2006 ## #def body_html

%s

#end def $phd_pp_ru.respond(self) """) write_if_changed(os.path.join(blog_root, "tags", tag+".tmpl"), ''.join(tag_text)) new_text.append("""\

#end def $phd_pp_ru.respond(self) """) write_if_changed(os.path.join(blog_root, "tags", "index.tmpl"), ''.join(new_text)) from atom_10 import atom_10 from rss_20 import rss_20 from news import NewsItem if blog_root: baseURL = "http://phd.pp.ru/%s/" % blog_root else: baseURL = "http://phd.pp.ru/" items = [] for item in tuple(reversed(all_titles_tags))[:10]: year, month, day, file, title, lead, tags = item if lead: lead = lead + ' ' else: lead = '' item = NewsItem( "%s-%s-%s" % (year, month, day), "%s%s" % (lead, title), "%s/%s/%s/%s" % (year, month, day, file) ) items.append(item) item.baseURL = baseURL item.categoryList = tags namespace = { "title": "Oleg Broytman's blog", "baseURL": baseURL, "indexFile": "", "description": "", "lang": "ru", "author": "Oleg Broytman", "email": "phd@phd.pp.ru", "generator": os.path.basename(sys.argv[0]), "posts": items, } # For english dates locale.setlocale(locale.LC_TIME, 'C') atom_tmpl = str(atom_10(searchList=[namespace])) write_if_changed(os.path.join(blog_root, "atom_10.xml"), atom_tmpl) rss_tmpl = str(rss_20(searchList=[namespace])) write_if_changed(os.path.join(blog_root, "rss_20.xml"), rss_tmpl)