recepta na Joe Monstera

unvee

Na szybciutko zrobilem recepte na joemonster.org wiec sie podziele bo o dziwo dziala;)

wersja 0.1
+ zmiane obrazków na grayscale + kompresje, tak, żeby zmniejszyć rozmiar pliku wyjściowego


	Kod: from calibre.web.feeds.news import BasicNewsRecipe from calibre.utils.magick import Image import re class joe(BasicNewsRecipe): title = u'Joe Monster' use_embedded_content = False oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True remove_javascript = True conversion_options = {'linearize_tables': True } feeds = [(u'JoeMonster', u'http://www.joemonster.org/backend.php')] def print_version(self,url): aut = re.compile('http://www.joemonster.org/art/(\d+)') res = aut.search(url) link = res.groups()[0] return url.replace(url, 'http://www.joemonster.org/print.php?sid=' + link) def postprocess_html(self, soup, first): for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')): iurl = tag['src'] img = Image() img.open(iurl) if img < 0: raise RuntimeError('Out of memory') img.type = "GrayscaleMatteType" img.set_compression_quality(40) img.save(iurl) return soup

wersja 0.2
+ na podstawie http://www.mobileread.com/forums/showpo ... stcount=10 recepta pobiera tylko nowe artykuly


	Kod: from calibre.web.feeds.news import BasicNewsRecipe from calibre.utils.magick import Image from calibre.constants import config_dir, CONFIG_DIR_MODE import os, os.path, urllib from hashlib import md5 import re class joeLast(BasicNewsRecipe): title = u'Joe Monster new' use_embedded_content = False oldest_article = 10000 max_articles_per_feed = 10000 feeds = [ ] no_stylesheets = True remove_javascript = True conversion_options = {'linearize_tables': True } feeds = [(u'JoeMonster', u'http://www.joemonster.org/backend.php')] def print_version(self,url): aut = re.compile('http://www.joemonster.org/art/(\d+)') res = aut.search(url) link = res.groups()[0] return url.replace(url, 'http://www.joemonster.org/print.php?sid=' + link) def postprocess_html(self, soup, first): for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')): iurl = tag['src'] img = Image() img.open(iurl) if img < 0: raise RuntimeError('Out of memory') img.type = "GrayscaleMatteType" img.set_compression_quality(40) img.save(iurl) return soup def parse_feeds(self): recipe_dir = os.path.join(config_dir,'recipes') hash_dir = os.path.join(recipe_dir,'recipe_storage') feed_dir = os.path.join(hash_dir,self.title.encode('utf-8').replace('/',':')) if not os.path.isdir(feed_dir): os.makedirs(feed_dir,mode=CONFIG_DIR_MODE) feeds = BasicNewsRecipe.parse_feeds(self) for feed in feeds: feed_hash = urllib.quote(feed.title.encode('utf-8'),safe='') feed_fn = os.path.join(feed_dir,feed_hash) past_items = set() if os.path.exists(feed_fn): with file(feed_fn) as f: for h in f: past_items.add(h.strip()) cur_items = set() for article in feed.articles[:]: item_hash = md5() if article.content: item_hash.update(article.content.encode('utf-8')) if article.summary: item_hash.update(article.summary.encode('utf-8')) item_hash = item_hash.hexdigest() if article.url: item_hash = article.url + ':' + item_hash cur_items.add(item_hash) if item_hash in past_items: feed.articles.remove(article) with file(feed_fn,'w') as f: for h in cur_items: f.write(h+'\n') remove = [f for f in feeds if len(f) == 0 and self.remove_empty_feeds] for f in remove: feeds.remove(f) return feeds

wersja 0.21
+ poprawki, żeby pobierał wszystkie artykuły, zmiana okładki


	Kod: from calibre.web.feeds.news import BasicNewsRecipe from calibre.utils.magick import Image from calibre.constants import config_dir, CONFIG_DIR_MODE import os, os.path, urllib from hashlib import md5 import re class joeLast(BasicNewsRecipe): title = u'Joe Monster' description = u'Niecodziennik Satyryczno Prowokujący' simultaneous_downloads = 20 publication_type = 'newspaper' use_embedded_content = False oldest_article = 10000 max_articles_per_feed = 10000 no_stylesheets = True remove_javascript = False conversion_options = {'linearize_tables': True} feeds = [(u'JoeMonster', u'http://www.joemonster.org/backend.php')] cover_url = 'http://www.joemonster.org/images/logo.gif' def print_version(self,url): aut = re.compile('http://www.joemonster.org/art/(\d+)') res = aut.search(url) link = res.groups()[0] return url.replace(url, 'http://www.joemonster.org/print.php?sid=' + link) def preprocess_html(self, soup): for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')): tag['src'] = tag['src'].replace("../","") return soup def postprocess_html(self, soup, first): for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')): iurl = tag['src'] img = Image() img.open(iurl) if img < 0: raise RuntimeError('Out of memory') img.type = "GrayscaleMatteType" img.set_compression_quality(40) img.save(iurl) return soup def parse_feeds(self): recipe_dir = os.path.join(config_dir,'recipes') hash_dir = os.path.join(recipe_dir,'recipe_storage') feed_dir = os.path.join(hash_dir,self.title.encode('utf-8').replace('/',':')) if not os.path.isdir(feed_dir): os.makedirs(feed_dir,mode=CONFIG_DIR_MODE) feeds = BasicNewsRecipe.parse_feeds(self) for feed in feeds: feed_hash = urllib.quote(feed.title.encode('utf-8'),safe='') feed_fn = os.path.join(feed_dir,feed_hash) past_items = set() if os.path.exists(feed_fn): with file(feed_fn) as f: for h in f: past_items.add(h.strip()) cur_items = set() for article in feed.articles[:]: item_hash = md5() if article.content: item_hash.update(article.content.encode('utf-8')) if article.summary: item_hash.update(article.summary.encode('utf-8')) item_hash = item_hash.hexdigest() if article.url: item_hash = article.url + ':' + item_hash cur_items.add(item_hash) if item_hash in past_items: feed.articles.remove(article) with file(feed_fn,'w') as f: for h in cur_items: f.write(h+'\n') remove = [f for f in feeds if len(f) == 0 and self.remove_empty_feeds] for f in remove: feeds.remove(f) return feeds

recepta na Joe Monstera

Kto przegląda forum