#!/usr/bin/env python
"""
random_sampling.py
Created by Marc-Andre on 2013-03-21.
modified 1 july 2014 for interactive mode.
modified february 2017 for python3 compat.
Copyright (c) 2013 IGBMC. All rights reserved.
Generates random sampling for NUS FT spectroscopy
can be used
as a standalone by answering to the prompted questions
as a library by importing the code
the program displays the results - closing the display will generate the files
The program generates two files.
one with the delay values, typically used for the acquisition process,
and one with index values, typically used for the analysis.
Parameters that define the sampling are :
the size of the complete sampling
SIZE = 20000
the sampling ratio - 0.3 means sampling 30% of the entries
RATIO = 0.2
thus with SIZE = 1000, RATIO = 0.3 300 values out of 1000 will be generated
the dwell time (Nyquist frequency) - used for the delay list
DWELL = 0.0033
the sampling protocole defines how the random points are chosen.
three different protocoles are available : random / poisson / uniform (sampling P out of N)
PROTOCOLE = "poisson"
points are sampled so that the gaps between sample points follow a poisson law of $\mu$ = P/N
PROTOCOLE = "uniform"
points are sampled so that the gaps between sample points follow a uniform law
so all gaps from 1 to N/P are equiprobable
PROTOCOLE = "random"
there points are simply sampled at random, using a uniform sampline of P out on N
the gap distribution follows an exponential law
HEAD determines a number of points that will be kept linearly sampled in the beginning of the sampling
usefull for allowing regular analysis of the data
HEAD = 100
Additional parameters :
The seed of the random generator.
This allows the program to be run several time with reproducible results
The same seed will generate the same values.
Changing the seed will change the values no SEED will generate a new distribution, any integer number will do.
None draws a new distribution each time.
SEED = 1234
file basename of the stored files
FNAME = "Sampling_file"
generates two files :
- the delay list used by the spectrometer - named 'filename'.delay
- the sampling function used for analysis - named 'filename'.list
if PLOT is True, then the program displays the results - closing the display will generate the files
PLOT = True
Use in a program as follows :
import random_sampling as rs
rs.SIZE = 10000
rs.DWELL = 0.005
rs.RATIO = 0.12
rs.FNAME = "My_file" # generate 2 files : My_file and My_file.list
rs.PROTOCOLE = "poisson"
#then either
rs.main() # creates both files - default is no display
#or
sampling = poisson_gap(SIZE, ratio=RATIO)
...
"""
from __future__ import print_function, division
import numpy as np
from scipy.stats import poisson
import time
import unittest
import sys
#
#import spike.Display.testplot as testplot
#plt = testplot.plot()
# these two lines are kind of equivalent to "import matplotlib.pyplot as plt"
import matplotlib.pyplot as plt
# First fix the seed of the random generator
# This allows the program to be run several time with reproducible results
# The same seed will generate the same values. Changing the seed will change the values
# no SEED will generate a new distribution each time
# any integer number will do
SEED = None
# define size of the complete sampling
SIZE = 20000
# define dwell time (Nyquist frequency)
DWELL = 0.0033
#define sampling ratio - 0.3 means sampling 30% of the entries - thus if SIZE = 1000, only 300 entries will be generated
RATIO = 0.2
# sampling protocole : random / poisson / uniform
#PROTOCOLE = "uniform"
#PROTOCOLE = "random"
PROTOCOLE = "poisson"
# file name of the stored files
FNAME = "Sampling_file"
# if PLOT is True, then the program displays the results - closing the display will generate the files
PLOT = False
# HEAD determines a number of points that will be kept linearly sampled in the beginning of the sampling
# usefull for allowing regular analysis of the data
HEAD = 0
[docs]def random_gap(size, ratio=0.5):
"""
for a sampling function of 'size' long
generate a random sampling with sampling 'ratio'
the first 'head' values are kept linearly sampled
"""
ssize = int(size*ratio)
head = min(HEAD,ssize-1)
perm = np.random.permutation(size-head)+head # generate a permutation from 2 to size
samp = np.zeros(ssize,dtype=int)
samp[head:ssize] = perm[:ssize-head] # and truncate
samp[0:head] = list(range(head)) # we set the first values
# samp[0] = size # first entry being 0, we replace it with last value
samp.sort() # then sort it
samp[ssize-1] = size-1 # last entry with last value
return samp
[docs]def poisson_gap(size, ratio=0.5):
"""
for a sampling function of 'size' long
generate a random sampling with sampling 'ratio' with poisson distribution of gap
the first 'head' values are kept linearly sampled
"""
ssize = int(size*ratio)
head = min(HEAD,ssize-1)
mu0 = (size-head)/(size*ratio -head) # 2 x ( length/nb of steps)
mu = mu0 # *0.99 # just to help a little
samp = list(range(head)) # we set the first values
rv = poisson(mu)
n = head
while 1:
if len(samp) == ssize:
break
samp.append(n)
prev = n
while prev == n: # avoid 0 increments
n += rv.rvs()
samp = correct(samp, size)
return np.array(samp)
[docs]def correct_size(sampling,targetsize):
"this removes trailing entries in sampling until its size is equal to targetsize"
while len(sampling)>targetsize:
sampling.pop(-1)
return sampling
[docs]def correct(sampling,targetsize):
"this corrects entries in sampling so that entries go from 0 to targetsize-1, and removes duplicates"
ratio = float(max(sampling))/targetsize
# print( ratio, max(sampling), targetsize)
newsamp = []
for i,s in enumerate(sampling):
val = min(int(round(s/ratio)), targetsize-1)
while val in newsamp:
print(val)
val = val-1
if val <0:
raise Exception("run failed, please rerun")
newsamp.append(val)
return newsamp
[docs]def write(filename, sampling, dwell, size, ratio, proto):
""" given sampling and dwell
generates two files :
- the delay list used by the spectrometer - named 'filename'.delay
- the sampling function used for analysis - named 'filename'.list
"""
F = open(filename+'.delay','w')
G = open(filename+'.list','w')
for FG in (F,G): # both files share same header
FG.write("#File generated by random_sampling.py\n")
FG.write("# Date : %s\n"%time.ctime() )
FG.write("#\n")
FG.write("# Initial size : %d\n" % size)
FG.write("# sampled size : %d\n" % len(sampling))
FG.write("# sampling ratio: %f\n" % ratio)
FG.write("# Protocole : %s\n" % proto)
FG.write("# Dwell time : %f\n" % dwell)
FG.write('# linear zone : %d\n' % (min(HEAD,len(sampling)-1)))
FG.write("# seed : %s\n" % SEED)
FG.write("#\n")
for i in sampling:
F.write("%f\n"%(dwell*i))
G.write("%d\n"%i)
F.close()
G.close()
[docs]def realize(size, sampling):
"""returns a buffer filled with 1.0 and 0.0 realizing the sampling"""
disp = np.zeros(size)
disp[sampling] = 1.0
return disp
[docs]def plotit2(size, sampling):
"""plots the result along with the PSF and the gap histogram """
import numpy.fft as fft
plt.subplot(221)
gaps = sampling[1:]-sampling[:-1]
plt.hist(gaps, bins=np.arange(max(gaps)) )
plt.title("gap distribution")
plt.subplot(223)
disp = realize(size, sampling)
psf = fft.fftshift(fft.fft(disp)) # complex psf
apsf = abs(psf) # its modulus
plt.plot(apsf)
sig = apsf.max()
noise = psf[:len(psf)//3].std()
snr = 20*np.log10(sig/noise)
plt.text(1,0.9*sig,"SNR : %.2f dB"%snr)
plt.title("Point Spread Function of the sampling")
plt.subplot(224)
plt.plot(sampling)
plt.title('cumulative')
plt.subplot(222)
plotit(size, sampling)
[docs]def plotit(size, sampling):
"""plots the result"""
disp = realize(size, sampling)
plt.plot(disp)
plt.ylim(ymax=1.1,ymin=-0.1)
plt.title('Measure on %d points, %sly sampled at %.2f %%'%(size, PROTOCOLE, (100.0*len(sampling))/size))
[docs]class Tests(unittest.TestCase):
[docs] def test1(self):
global PROTOCOLE, SIZE, HEAD, RATIO, DWELL, FNAME, SEED
PROTOCOLE = "poisson"
SIZE = 20000
HEAD = 10
RATIO = 0.25
SEED = 12345
FNAME = "TestFile"
sampling = main()
# print( sampling)
print( sampling.sum(), sampling.std())
self.assertTrue( sampling.max()==19999 )
self.assertTrue( sampling.sum()==49886308 )
self.assertAlmostEqual( sampling.std(), 5780.85909618678 )
[docs]def main():
global SEED
if SEED is None:
SEED = int(100*time.time()) % 4294967200
np.random.seed(SEED)
if PROTOCOLE == "random":
sampling = random_gap(SIZE, ratio=RATIO)
elif PROTOCOLE == "poisson":
sampling = poisson_gap(SIZE, ratio=RATIO)
elif PROTOCOLE == "uniform":
sampling = uniform_gap(SIZE, ratio=RATIO)
else:
print( "UNKNOWN PROTOCOLE")
sys.exit(0)
print( "generated {0} {1}ly sampled points out of {2} contiguous points.".format(len(sampling), PROTOCOLE, SIZE) )
print( "from {0} to {1}".format(sampling[0], sampling[-1]))
if PLOT:
plotit2(SIZE, sampling)
plt.show()
write(FNAME, sampling, DWELL, SIZE, RATIO, PROTOCOLE)
print( "result written to file {0}.list and {0}.delay".format(FNAME))
return sampling
[docs]def get_val(prompt, defval):
"a typed version of input(), with default value"
# modified to adapt python3
if sys.version_info[0] < 3:
z = raw_input("%s, (%s) "%(prompt,defval))
else:
z = input("%s, (%s) "%(prompt,defval))
t = type(defval)
if t == str:
if z == "":
z = defval
else:
try:
z = eval(z)
except SyntaxError:
z = defval
print(z)
return z
[docs]def parse_arg(argv=None):
"read arg line and act"
if not argv:
argv = sys.argv
if len(argv) == 1:
print( 'try running "%s help" for documentation'%argv[0])
param = get_from_input()
main()
elif argv[1] in ("help", "HELP", "?"):
print( __doc__)
sys.exit(0)
else:
print( 'unknown argument, try "%s help"'%argv[0])
if __name__ == '__main__':
parse_arg()
PLOT = True
#unittest.main()