Saturday, May 28, 2011

Data fitting using fmin

We have seen already how to find the minimum of a function using fmin, in this example we will see how use it to fit a set of data with a curve minimizing an error function:
from pylab import *
from numpy import *
from numpy.random import normal
from scipy.optimize import fmin

# parametric function, x is the independent variable
# and c are the parameters.
# it's a polynomial of degree 2
fp = lambda c, x: c[0]+c[1]*x+c[2]*x*x
real_p = rand(3)

# error function to minimize
e = lambda p, x, y: (abs((fp(p,x)-y))).sum()

# generating data with noise
n = 30
x = linspace(0,1,n)
y = fp(real_p,x) + normal(0,0.05,n)

# fitting the data with fmin
p0 = rand(3) # initial parameter value
p = fmin(e, p0, args=(x,y))

print 'estimater parameters: ', p
print 'real parameters: ', real_p

xx = linspace(0,1,n*3)
plot(x,y,'bo', xx,fp(real_p,xx),'g', xx, fp(p,xx),'r')

show()
The following figure will be showed, in green the original curve used to generate the noisy data, in blue the noisy data and in red the curve found in the minimization process:

The parameters will be printed also:
Optimization terminated successfully.
         Current function value: 0.861885
         Iterations: 77
         Function evaluations: 146
estimater parameters:  [ 0.92504602  0.87328979  0.64051926]
real parameters:  [ 0.86284356  0.95994753  0.67643758]

Friday, May 27, 2011

Delaunay triangulation with matplotlib

How to plot the delaunay triangulation for a set of points in the plane using matplotlib:
import matplotlib.delaunay as triang
import pylab
import numpy

# 10 random points (x,y) in the plane
x,y =  numpy.array(numpy.random.standard_normal((2,10)))
cens,edg,tri,neig = triang.delaunay(x,y)

for t in tri:
 # t[0], t[1], t[2] are the points indexes of the triangle
 t_i = [t[0], t[1], t[2], t[0]]
 pylab.plot(x[t_i],y[t_i])

pylab.plot(x,y,'o')
pylab.show()
The output will be similar to this:

Wednesday, May 25, 2011

Pickling: How to serialize objects

There is an example on how to serialize and de-serialize python objects.
import pickle
import random

print 'Write data...'
output = open('mydata.pkl', 'wb')
for i in range(0,5): # writing five lists on the file
 list = random.sample(range(0,100),10) # random integers list
 print 'saving:',list
 pickle.dump(list, output)
output.close()

print 'Load data...'
input = open("mydata.pkl","rb") # open the file in reading mode
try:
 while True: # load from the file until EOF is reached
  list = pickle.load(input)
  print 'loaded: ',list
except EOFError:
  print 'End of file reached'
input.close()
This is the output:
Write data...
saving: [15, 10, 85, 65, 17, 31, 19, 2, 68, 44]
saving: [96, 2, 27, 90, 99, 66, 31, 97, 51, 12]
saving: [82, 61, 12, 49, 50, 84, 91, 83, 23, 89]
saving: [83, 28, 85, 75, 14, 68, 96, 58, 5, 66]
saving: [55, 73, 1, 24, 29, 92, 58, 96, 41, 10]
Load data...
loaded:  [15, 10, 85, 65, 17, 31, 19, 2, 68, 44]
loaded:  [96, 2, 27, 90, 99, 66, 31, 97, 51, 12]
loaded:  [82, 61, 12, 49, 50, 84, 91, 83, 23, 89]
loaded:  [83, 28, 85, 75, 14, 68, 96, 58, 5, 66]
loaded:  [55, 73, 1, 24, 29, 92, 58, 96, 41, 10]
End of file reached

Monday, May 23, 2011

Four ways to compute the Google Pagerank

As described in THE $25,000,000,000 EIGENVECTOR THE LINEAR ALGEBRA BEHIND GOOGLE, we can compute the score of a page on a web as the maximal eigenvector of the matrix

Google

where A is the scaled connectivity matrix of a web, S is an n × n matrix with all entries 1/n and m is a real number between 0 and 1.

Here's implemented four ways to compute the maximal eigenvector of the matrix using the numpy:
from numpy import *

def powerMethodBase(A,x0,iter):
 """ basic power method """
 for i in range(iter):
  x0 = dot(A,x0)
  x0 = x0/linalg.norm(x0,1)
 return x0

def powerMethod(A,x0,m,iter):
 """ power method modified to compute
     the maximal real eigenvector 
     of the matrix M built on top of the input matrix A """
 n = A.shape[1]
 delta = m*(array([1]*n,dtype='float64')/n) # array([1]*n is [1 1 ... 1] n times
 for i in range(iter):
  x0 = dot((1-m),dot(A,x0)) + delta
 return x0

def maximalEigenvector(A):
 """ using the eig function to compute eigenvectors """
 n = A.shape[1]
 w,v = linalg.eig(A)
 return abs(real(v[:n,0])/linalg.norm(v[:n,0],1))

def linearEquations(A,m):
 """ solving linear equations 
     of the system (I-(1-m)*A)*x = m*s """
 n = A.shape[1]
 C = eye(n,n)-dot((1-m),A)
 b = m*(array([1]*n,dtype='float64')/n)
 return linalg.solve(C,b)

def getTeleMatrix(A,m):
 """ return the matrix M
     of the web described by A """
 n = A.shape[1]
 S = ones((n,n))/n
 return (1-m)*A+m*S

A = array([ [0,     0,     0,     1, 0, 1],
            [1/2.0, 0,     0,     0, 0, 0],
            [0,     1/2.0, 0,     0, 0, 0],
            [0,     1/2.0, 1/3.0, 0, 0, 0],
            [0,     0,     1/3.0, 0, 0, 0],
            [1/2.0, 0,     1/3.0, 0, 1, 0 ] ])

n = A.shape[1] # A is n x n
m = 0.15
M = getTeleMatrix(A,m)

x0 = [1]*n
x1 = powerMethod(A,x0,m,130)
x2 = powerMethodBase(M,x0,130)
x3 = maximalEigenvector(M)
x4 = linearEquations(A,m)

# comparison of the four methods
labels = range(1,6)
print array([labels, x1, x2, x3, x4]).T

The matrix A used to the test the program describe the following web


The scores are (the first column show the labels):
[[ 1.          0.32954577  0.32954577  0.32954577  0.32954577]
 [ 2.          0.16505695  0.16505695  0.16505695  0.16505695]
 [ 3.          0.0951492   0.0951492   0.0951492   0.0951492 ]
 [ 4.          0.12210815  0.12210815  0.12210815  0.12210815]
 [ 5.          0.05195894  0.05195894  0.05195894  0.05195894]
 [ 6.          0.23618099  0.23618099  0.23618099  0.23618099]]

Friday, May 20, 2011

Latent Semantic Analysis with Term-Document matrix

This example is inspired by the second paragraph of the paper Matrices, vector spaces, and information retrieval. It shows a vector space representation of information used to represent documents in a collection and the query algorithm to find relevant documents. This example implement the model and the query matching algorithm using the linear algebra module provided by numpy. The program is tested on the sample data in Figure 2 of the paper.
import numpy
def buildTermDocumentMatrix(terms,docs):
 """ build a term-document matrix """
 tlen = len(terms)
 dlen = len(docs)
 A = numpy.zeros((tlen, dlen))

 for i,t in enumerate(terms):
  for j,d in enumerate(docs):
   A[i,j] = d.lower().count(t) # computing terms frequencies

 for i in range(dlen): # normalize columns
  A[:tlen,i] = A[:tlen,i]/numpy.linalg.norm(A[:tlen,i])

 return A

def query(A,q,docs):
 """ make the query and print the result """
 q = q/numpy.linalg.norm(q) # normalize query vector
 for i in range(len(docs)):
  # dot product
  print '-Doc  :',docs[i],'\n-Match:',numpy.dot(A[:6,i].T,q) 

# documents collection
docs =['How to Bake Bread Without Recipes',
'The Classic Art of Viennese Pastry',
'Numerical Recipes: The Art of Scientific Computing',
'Breads, Pastries, Pies and Cakes : Quantity Baking Recipes',
'Pastry: A Book of Best French Recipe']
# interesting terms
terms = ['bak','recipe','bread','cake','pastr','pie']

# will return a matrix 6 terms x 5 documents
A = buildTermDocumentMatrix(terms,docs) 
print 'Normalized Terms-Documents matrix'
print A

print '\n*** Query: "bak(e,ing)" + "bread"'
q1 = numpy.array([1,0,1,0,0,0])
query(A,q1,docs)

print '\n*** Query: "bak(e,ing)" only'
q2 = numpy.array([1,0,0,0,0,0])
query(A,q2,docs)
The results are the same as is the reference paper:
Normalized Terms-Documents matrix
[[ 0.57735027  0.          0.          0.40824829  0.        ]
 [ 0.57735027  0.          1.          0.40824829  0.70710678]
 [ 0.57735027  0.          0.          0.40824829  0.        ]
 [ 0.          0.          0.          0.40824829  0.        ]
 [ 0.          1.          0.          0.40824829  0.70710678]
 [ 0.          0.          0.          0.40824829  0.        ]]

*** Query: "bak(e,ing)" + "bread"
-Doc  : How to Bake Bread Without Recipes 
-Match: 0.816496580928
-Doc  : The Classic Art of Viennese Pastry 
-Match: 0.0
-Doc  : Numerical Recipes: The Art of Scientific Computing 
-Match: 0.0
-Doc  : Breads, Pastries, Pies and Cakes : Quantity Baking Recipes 
-Match: 0.57735026919
-Doc  : Pastry: A Book of Best French Recipe 
-Match: 0.0

*** Query: "bak(e,ing)" only
-Doc  : How to Bake Bread Without Recipes 
-Match: 0.57735026919
-Doc  : The Classic Art of Viennese Pastry 
-Match: 0.0
-Doc  : Numerical Recipes: The Art of Scientific Computing 
-Match: 0.0
-Doc  : Breads, Pastries, Pies and Cakes : Quantity Baking Recipes 
-Match: 0.408248290464
-Doc  : Pastry: A Book of Best French Recipe 
-Match: 0.0
Other resources about about the model implemented can be found here:

Wednesday, May 18, 2011

Function with arbitrary argument lists

This example shows how to define a function with arbitrary argument list.
def sum(*addends):
 s = 0
 for n in addends:
  s += n
 return s
So, now we can call the function with different numbers of argument:
print sum(1,2,3,4)
10
print sum(1,2,3)
6

Tuesday, May 17, 2011

How to interpolate a set of points

The purpose of this example is to show how to interpolate a set of points (x,y) using the funtion interp1 provided by scipy.
import scipy.interpolate as sp
import numpy
import pylab

# 50 points of sin(x) in [0 10]
xx = numpy.linspace(0, 10, 50)
yy = numpy.sin(xx)

# 10 sample of sin(x) in [0 10]
x = numpy.linspace(0, 10, 10)
y = numpy.sin(x)

# interpolation
fl = sp.interp1d(x, y,kind='linear')
fc = sp.interp1d(x, y,kind='cubic')

# fl and fc are the interpolating functions
# defined in the interval [0 10]
# fl uses linear interpolation
# and fc uses cubic interpolation

xnew = numpy.linspace(0, 10, 50)
pylab.subplot(211)
# the real sin(x) function plot
pylab.plot(xx, yy)
pylab.legend(['sin(x)'], loc='best')
pylab.subplot(212)
# the interpolation
pylab.plot(x, y, 'o', xnew, fl(xnew), xnew, fc(xnew))
pylab.legend(['sample', 'linear', 'cubic'], loc='lower left')
pylab.show()
The script will show the following figure:

Monday, May 16, 2011

How to read csv

How to read a csv (comma separated values) file:
import csv
reader = csv.reader(open('values.csv','rb'))
for row in reader:
 print row
The script will print a list for each line in the csv file
$ python csvread.py 
['0', '0', '5', '385', '0', '209', '0', '0', '0', '2']
['0', '0', '17', '30', '0', '5', '7', '6', '0', '0']
['11', '0', '97', '468', '0', '338', '28', '0', '0', '3']
$ cat values.csv 
0,0,5,385,0,209,0,0,0,2
0,0,17,30,0,5,7,6,0,0
11,0,97,468,0,338,28,0,0,3

How to create an Irc echo bot

The example shows how to connect to an irc server and how to read and send data from the server.
import socket

def reply(privmsg, socket):
 """ decode the string with message,
     something like ':nickname!~hostname PRIVMSG my_nickname :hi'
     and echoes the message received to nickname """
 nick = privmsg[1:privmsg.find('!')]
 msg = privmsg[privmsg.find(':',1,len(privmsg))+1:len(privmsg)] 
 socket.send('PRIVMSG '+nick+' :'+msg) # sending to the socket


print 'Connecting...'
s = socket.socket()
s.connect(('irc.freenode.net',6667)) #connection to the irc server
s.send('NICK GlowPy\n')
s.send('USER PythonBot my.host.name humm : My Real Name\n')

while True:
 data = s.recv(1024) # reading from the socket
 print data
 if data.find('PRIVMSG') > 0: # if the string is a message
  reply(data,s)
A conversation with the bot:
<JustGlowing> hi there!
<GlowPy> hi there!
<JustGlowing> how are you?
<GlowPy> how are you?

Wednesday, May 11, 2011

How to download the profile picture of a facebook user

The follwing function uses the facebook graph API to retrieve the url of the profile picture from the user's id:
import urllib
import simplejson

def getProfilePicUrl(user_id):
 api_query = urllib.urlopen('https://graph.facebook.com/'+user_id)
 dict = simplejson.loads(api_query.read())
 return dict['picture']

When we visit a facebook profile the user id is displayed in the address of the page. This is the address of the cocacola page, in red the user id of coca cola:

http://www.facebook.com/profile.php?id=40796308305

now can use the id to save the profile picture of coca cola.

pic_url = getProfilePicUrl('40796308305')
pic = urllib.urlopen(pic_url) # retrieve the picture
f = open("cocacola.jpg","wb")
f.write(pic.read()) # save the pic
f.close()
The script will save the picture on the disk.

Warning: coca cola has a public profile, non-public profile need authentication.

Tuesday, May 10, 2011

How to find the intersection of two functions

Previously we have seen how to find roots of a function with fsolve, in this example we use fsolve to find an intersection between two functions, sin(x) and cos(x):
from scipy.optimize import fsolve
import pylab
import numpy

def findIntersection(fun1,fun2,x0):
 return fsolve(lambda x : fun1(x) - fun2(x),x0)

result = findIntersection(numpy.sin,numpy.cos,0.0)
x = numpy.linspace(-2,2,50)
pylab.plot(x,numpy.sin(x),x,numpy.cos(x),result,numpy.sin(result),'ro')
pylab.show()
In the graph we can see sin(x) (blue), cos(x) (green) and the intersection found (red dot) starting from x = 0.

Monday, May 9, 2011

How to find the roots of a function with fsolve

The function fsolve provided by numpy return the roots of the (non-linear) equations defined by func(x) = 0 given a starting estimate. We will see how to use fsolve to find the root of the function

from scipy.optimize import fsolve
import pylab
import numpy

pow3 = lambda x : x**3

result = fsolve(pow3,10) # starting from x = 10
print result
x = numpy.linspace(-1,1,50)
pylab.plot(x,pow3(x),result,pow3(result),'ro')
pylab.grid(b=1)
pylab.show()
In the following graph we can see f(x) (blue curve) and the solution found (red dot)

Friday, May 6, 2011

How to use string's Template

Templates provide simple string substitutions mechanism, here's a simple demonstration:
from string import Template

template = Template('The president of $state is $name')
message = template.substitute(state='USA', name='Obama')
print '1.',message
message = template.substitute(state='France', name='Sarkozy')
print '2.',message

try:
 # will raise an exception
 message = template.substitute(state='England')
except Exception as e:
 print 'I cannot fill the placeholder',e
 #  original name placeholder will be used
 message = template.safe_substitute(state='England') 

print '3.',message
The output of this script will be
1. The president of USA is Obama
2. The president of France is Sarkozy
I cannot fill the placeholder 'name'
3. The president of England is $name

Thursday, May 5, 2011

QR decomposition with numpy

We will see how to compute the QR decomposition of a matrix A and how to use Q and R to solve the linear equation system Ax=b using the from described here.
from numpy import *

A = floor(random.rand(4,4)*20-10) # random matrix

Q,R = linalg.qr(A) # qr decomposition of A

b = floor(random.rand(4,1)*20-10)
# solve Ax = b using the standard numpy function
x = linalg.solve(A,b)

# solve Ax = b using Q and R
y = dot(Q.T,b)
xQR = linalg.solve(R,y) 

print "\nSolution compared"
print x.T,'Ax=b'
print xQR.T,'Rx=y'
And now we can compare the solutions obtained
Solution compared
[[ 0.69207502  0.05565638  0.68965517 -0.2183908 ]] Ax=b
[[ 0.69207502  0.05565638  0.68965517 -0.2183908 ]] Rx=y

Tuesday, May 3, 2011

How to synchronize threads using locks

In this post we will extend a previous example about multithreading. Here's how to synchronize two threads using a simple lock:
import threading
import thread
import time

class MyThread(threading.Thread):
 def __init__(self, myName, lock):
  threading.Thread.__init__(self)
  self.myName = myName
  self.lock = lock

 def run(self):
  while True:
   self.lock.acquire()
   print self.myName,'is in the critical section the lock'
   time.sleep(1) # wait 1 second
   print self.myName,'releasing the lock'
   self.lock.release()
   

if __name__=="__main__":
 lock=thread.allocate_lock()
 thread1 = MyThread("1",lock)
 thread1.start()
 thread2 = MyThread("2",lock)
 thread2.start()
 while True: pass
A thread can't print in the console until he acquires the lock. The output will be similar to this:
1 is in the critical section the lock
1 releasing the lock
2 is in the critical section the lock
2 releasing the lock
1 is in the critical section the lock
1 releasing the lock
2 is in the critical section the lock
2 releasing the lock
2 is in the critical section the lock
2 releasing the lock
1 is in the critical section the lock
1 releasing the lock
2 is in the critical section the lock
2 releasing the lock
...
WARNING: This is a simple example with two threads and only one critical section, more complicated situation need other synchronization mechanisms.

Monday, May 2, 2011

How to create a chart with Google Chart API

The example shows how to create a scatter plot using the Google Chart API.
import random
import urllib

def list2String(x):
 """ from a list like [1,2,5]
     return a string like '1,2,5' """
 data = ""
 for i in x:
  data += str(i)+","
 return data[0:len(data)-1]

def makeChart(x,y,filename):
 query_url = "http://chart.apis.google.com/chart?chxt=x,y&chs=300x200&cht=s&chd=t:"
 query_url += list2String(x)+"|"+list2String(y)
 chart = urllib.urlopen(query_url) # retrieve the chart
 print "saving",query_url
 f = open(filename,"wb")
 f.write(chart.read()) # save the pic
 f.close()

x = random.sample(range(0,100),10) # list with
y = random.sample(range(0,100),10) # random values in [0 100[
makeChart(x,y,"chart.png")
You can embed the picture in a web page:
<img alt="Google chart example" src="http://chart.apis.google.com/chart?chxt=x,y&amp;chs=300x200&amp;cht=s&amp;chd=t:64,10,18,42,49,83,73,27,44,51|77,89,13,87,27,34,38,44,22,42" />
Or use it from the disk.

Google chart example