Simple RSS syndication
We are going to assume you have already created a project ( Hint: You just need an app.yaml configuration file and a main.py file). If you don't know, please refer to one of my earlier blog posts (above) or the references (below).In your main.py file add the following:
# The webapp2 framework
import webapp2
# The minidom library for XML parsing
from xml.dom.minidom import parseString
# The URL Fetch library
from google.appengine.api import urlfetch
# Fetches an XML document and parses it
class MainPage(webapp2.RequestHandler):
# Respond to a HTTP GET request
def get(self):
# A try-catch statement
try:
# Grabs the XML
url = urlfetch.fetch('https://api.twitter.com/1/statuses/user_timeline.xml?screen_name=almightyolive&count=10&trim_user=true')
# Parses the document
xml = parseString(url.content)
# Outputs the RSS
self.response.out.write(outputRSS(xml))
# Sets up the webpage
self.response.out.write("</table></body></html>")
# Our exception code
except (TypeError, ValueError):
self.response.out.write("<html><body><p>Invalid inputs</p></body></html>")
# Output the XML in a HTML friendly manner
def outputRSS(xml):
# The get the states list
statuses = xml.getElementsByTagName("status")
# Set up our XML return
outputString = "<?xml version='1.0'?>\n<rss version='2.0'>\n\t<channel>"
outputString+= "\n\t\t<title>Almightyolive Twitter</title>\n\t\t"
outputString+= "<link>https://twitter.com/#!/almightyolive</link>\n"
outputString+= "\t\t<description>The twitter feed</description>"
# Cycled through the statuses
for status in statuses:
#Gets the Text and date and cycles through them
text = status.getElementsByTagName("text")[0].firstChild.data
date = status.getElementsByTagName("created_at")[0].firstChild.data
string = "\n\t\t<item>\n\t\t\t<title>" + str(date) + "</title>\n"
string+= "\t\t\t<link>https://twitter.com/#!/almightyolive</link>\n\t\t"
string+= "\t<description>" + str(text) + "</description>\n\t\t</item>"
outputString+=string
# Output string
outputString += "\n\t</channel>\n</rss>"
return outputString
# Create our application instance that maps the root to our
# MainPage handler
app = webapp2.WSGIApplication([('/*', MainPage)], debug=True)
This is a very simple feed; you won't get any nice links and clicking on a particular item will just take you to the main feed. Now to tweak it just a little....
RSS with links
Now we will add a function that will add appropriate links to our tweets. Note that this is a really, really dumb function: it will apply to ANY instances of 'http' or '@' in a word, so it will accidentally affect emails or tweets about the HTTP protocol. I leave it up to you to fix the code if you don't want these things to happen.Anyway, replace main.py with the following code:
# The webapp2 framework
import webapp2
# The minidom library for XML parsing
from xml.dom.minidom import parseString
# The URL Fetch library
from google.appengine.api import urlfetch
# Fetches an XML document and parses it
class MainPage(webapp2.RequestHandler):
# Respond to a HTTP GET request
def get(self):
# A try-catch statement
try:
# Grabs the XML
url = urlfetch.fetch('https://api.twitter.com/1/statuses/user_timeline.xml?screen_name=almightyolive&count=10&trim_user=true')
# Parses the document
xml = parseString(url.content)
# Outputs the RSS
self.response.out.write(outputRSS(xml))
# Our exception code
except (TypeError, ValueError):
self.response.out.write("<html><body><p>Invalid inputs</p></body></html>")
# Output the XML in a HTML friendly manner
def outputRSS(xml):
# The get the states list
statuses = xml.getElementsByTagName("status")
# Our return string
outputString = "<?xml version='1.0'?>\n<rss version='2.0'>\n\t<channel>"
outputString+= "\n\t\t<title>Almightyolive Twitter</title>\n\t\t"
outputString+= "<link>https://twitter.com/#!/almightyolive</link>\n"
outputString+= "\t\t<description>The twitter feed for the Almighty "
outputString+= "Olive</description>"
# Cycled through the states
for status in statuses:
#Gets the statuses
text = status.getElementsByTagName("text")[0].firstChild.data
date = status.getElementsByTagName("created_at")[0].firstChild.data
tweet = status.getElementsByTagName("id")[0].firstChild.data
# Insert links into the text
words = text.split()
for i in range (len(words)):
words[i] = linkify(words[i])
# Recompile words
text = " ".join(words)
# Creates our output
string = "\n\t\t<item>\n\t\t\t<title>" + str(date) + "</title>\n"
string+= "\t\t\t<link>https://twitter.com/AlmightyOlive/status/" + tweet
string+= "</link>\n\t\t\t<description>" + str(text) + "</description>\n"
string+= "\t\t</item>"
outputString+=string
# Output string
outputString += "\n\t</channel>\n</rss>"
return outputString
# Detects if it is a URL link and adds the HTML tags
def linkify(text):
# If http is present in, add the link tag
if "http" in text:
text = "<a href='" + text + "'>" + text + "</a>"
elif "@" in text:
text = "<a href='http://twitter.com/#!/" + text.split("@")[1]
text+= "'>" + text + "</a>"
return text
# Create our application instance that maps the root to our
# MainPage handler
app = webapp2.WSGIApplication([('/*', MainPage)], debug=True)
And there we have it; the linkify() function will add links into our tweets to make them more usable and accessible!
References
- Google's own getting started with webapp and Python.
- The official webapp2 reference
- The Google developer resource for GAE
- Google App Engine FAQs
- YAML reference
- app.yaml reference
- RSS 2.0 at Harvard Law
- Mark Nottingham's blog post 'RSS Tutorial'
- RSS specifications home-page
- Twitter API reference to get a user's status feed
- Python string reference
- Tutorialspoint string tutorial and reference
No comments:
Post a Comment
Thanks for contributing!! Try to keep on topic and please avoid flame wars!!