はてなブログ投稿自動化 メモ
はてなブログ投稿自動化 メモ
hatena blog API (Atom)
Doc: https://kanaxx.hatenablog.jp/entry/hatena-entry-update
注意点
<updated>
タグを入れないと、更新日がスクリプト実行日に変わってしまう。作成日も一緒に変わってしまう。<category>
タグを常に入れて送らないとカテゴリーがクリアされてしまう。- 送信するxmlデータに埋め込む記事本文はエスケープが必要 (エラーレスポンスの内容ではこれが原因だとすぐ分からない)。
サンプルソース群 (python)
- 送信する記事データ(xml)作成
from datetime import datetime from xml.sax.saxutils import escape __BLOG_ENTRY_TEMPLATE = """<?xml version="1.0" encoding="utf-8"?> <entry xmlns="http://www.w3.org/2005/Atom" xmlns:app="http://www.w3.org/2007/app"> <title>{title}</title> <author><name>{author}</name></author> <content type="text/x-markdown">{content}</content> <updated>{update_time}</updated> <category term="{category}" /> <app:control> <app:draft>{draft}</app:draft> </app:control> </entry>""" ENTRY_DATE_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S" def resolve_entry_current_time() -> str: return datetime.now().strftime(ENTRY_DATE_TIME_FORMAT) def __replace_xml_escape(content: str) -> str: return escape(content) # escape: <, &, >, def build_hatena_blog_entry_xml_body(hatena_id: str, title: str, category: str, content: str, is_draft: bool = True) -> str: entry_xml = __BLOG_ENTRY_TEMPLATE.format( title=title, author=hatena_id, content=__replace_xml_escape(content), update_time=resolve_entry_current_time(), category=category, draft='yes' if is_draft else 'no' # yes or no ) return entry_xml
- リクエストヘッダ生成(wsse使用)
import base64 import hashlib import random from datetime import datetime class HatenaBlogApiExecutor: def __init__(self, blog_config: BlogConfig): self.__blog_conf = blog_config def build_request_header(self): def __build_wsse(blog_config: BlogConfig): user_name = blog_config.hatena_id api_key = blog_config.api_key created_time = datetime.now().isoformat() + "Z" b_nonce = hashlib.sha1(str(random.random()).encode()).digest() b_password_digest = hashlib.sha1(b_nonce + created_time.encode() + api_key.encode()).digest() wsse = f'UsernameToken Username={user_name}, ' + f'PasswordDigest={base64.b64encode(b_password_digest).decode()}, ' + f'Nonce={base64.b64encode(b_nonce).decode()}, ' + f'Created={created_time}' return wsse return { 'X-WSSE': __build_wsse(self.__blog_conf) }
- レスポンス解析ソース (xmlから欲しい情報だけ抜き出し ※突貫コード)
import xml.etree.ElementTree as ET from datetime import datetime from typing import List, Optional # for debug def print_xml_children(root: ET.Element): """ for debug """ for child in root: print(child.tag) def __get_tag_head(root: ET.Element, root_tag: str = 'feed') -> str: tag_head = root.tag[:-len(root_tag)] # tag example: {http://www.w3.org/2005/Atom}feed return tag_head def get_next_page_url(xml_string: str) -> Optional[str]: url = None root = ET.fromstring(xml_string) for link in root.iter(__get_tag_head(root) + 'link'): if link.attrib['rel'] == 'next': url = link.attrib['href'] break return url def parse_blog_entry_xml(xml_string_opt: str) -> Optional[BlogEntry]: if xml_string_opt is None: return None root = ET.fromstring(xml_string_opt) tag_head = __get_tag_head(root, 'entry') return __parse_blog_entry_xml(root, tag_head, []) def __parse_blog_entry_xml(entry_node: ET.Element, tag_head: str, exclude_ids: List[str]) -> Optional[BlogEntry]: # id example: tag:blog.hatena.ne.jp,2013:blog-Sympathia-17680117126980108518-13574176438048806685 # entry id is last sequence entry_id = entry_node.find(tag_head + 'id').text.rsplit('-', 1)[1] if entry_id in exclude_ids: return None title = entry_node.find(tag_head + 'title').text content = '' for cont in entry_node.iter(tag_head + 'content'): if cont.attrib['type'] == 'text/x-markdown': content = cont.text break updated_opt = entry_node.find(tag_head + 'updated') last_update_time = None if updated_opt is not None: # format: 2013-09-02T11:28:23+09:00 last_update_time = datetime.strptime(updated_opt.text, "%Y-%m-%dT%H:%M:%S%z") app_edited_opt = entry_node.find('{http://www.w3.org/2007/app}edited') # app:edited if app_edited_opt is not None: # format: 2013-09-02T11:28:23+09:00 app_edited_time = datetime.strptime(app_edited_opt.text, "%Y-%m-%dT%H:%M:%S%z") if last_update_time < app_edited_time: last_update_time = app_edited_time url_link = '' for link in entry_node.iter(tag_head + 'link'): if link.attrib['rel'] == 'alternate': url_link = link.attrib['href'] break categories = [] for category in entry_node.iter(tag_head + 'category'): categories.append(category.attrib['term']) return BlogEntry(entry_id, title, content, url_link, last_update_time, categories)