Commit 16715b56 authored by Elvira Clement's avatar Elvira Clement
Browse files

Adding slope screening code

parent 7fa2707b
# Credits
## Maintainers
* Clément Elvira <clement.elvira@centralesupelec.fr>
* Cédric Herzet <cedric.herzet@inria.fr>
## Contributors
None yet. Why not be the first? See: CONTRIBUTING.md
# Contributing
Contributions are welcome, and they are greatly appreciated! Every
little bit helps, and credit will always be given.
## Report Bugs
Report bugs at https://gitlab-research.centralesupelec.fr/2020elvirac/slope-screening/-/issues.
If you are reporting a bug, please include:
* Any details about your local setup that might be helpful in troubleshooting.
* Detailed steps to reproduce the bug.
## Submit Feedback
The best way to send feedback is to file an issue at https://gitlab-research.centralesupelec.fr/2020elvirac/slope-screening/-/issues.
If you are proposing a feature:
* Explain in detail how it would work.
* Keep the scope as narrow as possible, to make it easier to implement.
* Remember that this is a volunteer-driven project, and that contributions
are welcome :)
coverage==5.5
cycler==0.10.0
Cython==0.29.24
joblib==1.0.1
kiwisolver==1.3.2
matplotlib==3.4.3
numpy==1.21.2
Pillow==8.3.2
pybind11==2.7.1
pyparsing==2.4.7
python-dateutil==2.8.2
scikit-learn==0.24.2
scipy==1.7.1
six==1.16.0
sklearn==0.0
threadpoolctl==2.2.0
# -*- coding: utf-8 -*-
from setuptools import setup, find_packages
from Cython.Build import cythonize
from src import __version__
with open('README.md') as f:
readme = f.read()
with open('LICENSE') as f:
license = f.read()
setup(
name='slope-screening',
version=__version__,
description='Safe screening rules for the SLOPE problem',
long_description=readme,
author='Clément Elvira and Cédric Herzet',
author_email='clement.elvira@centralesupelec.fr',
url='',
license=license,
packages=find_packages(exclude=('tests', 'docs')),
ext_modules=cythonize([
"src/screening/utils.pyx",
"src/screening/gap_ptest.pyx",
"src/screening/gap_rqtest.pyx",
"src/screening/kappa_test.pyx",
]),
)
# -*- coding: utf-8 -*-
from src.__version import __version__
\ No newline at end of file
# -*- coding: utf-8 -*-
__version__ = "0.9.0"
\ No newline at end of file
# -*- coding: utf-8 -*-
import numpy as np
import scipy.fftpack
def generate_dic(str_dic, m, n, bnormalized=True):
"""Generate dictionaries
Parameters
----------
str_dic : str
str representing type of dictionaries
can be ['gaussian', 'uniform', 'dct', 'toeplitz']
m : int
number of rows
n : int
number of columns
bnormalized : bool
true if columns are unit-norm
Returns
-------
matA : np.ndarray
dictionary matrix in 'Fortran' order (column major)
size [m, n]
"""
if 'gaussian' in str_dic:
sp = str_dic.split(' ')
if len(sp) == 1:
rho = 0
else:
rho = float(sp[1])
matA = _gaussian(m, n, rho)
elif str_dic == "uniform":
matA = _uniform(m, n)
elif str_dic == "dct":
matA = _dct(m, n)
elif str_dic == "toeplitz":
matA = _toeplitz(m, n)
elif str_dic =="pulse":
#Defined in Trop'
matA = _pulse(m, n)
else:
raise "not recognized"
if bnormalized:
for j in range(n):
matA[:, j] /= np.linalg.norm(matA[:, j], 2)
return matA
def _gaussian(m, n, rho):
"""Generate dictionaries whose column are
multivariate Gaussian with covariance matrix
\\Sigma satisfying $\\Sigma_{i,j} = 1$ if $i=j$ and $rho$ otherwise
Parameters
----------
m : int
number of rows
n : int
number of columns
corr : positive float
non-diagonal entries of the correlation matrix
Returns
-------
matA : np.ndarray
matrix dictionary in 'Fortran order' (column major)
"""
matCov = np.zeros((m, m))
for i in range(m):
for j in range(m):
matCov[i, j] = .7**(np.abs(i - j))
matA = np.random.multivariate_normal(np.zeros(m), matCov, n).T
return matA
def _uniform(m, n):
# -- generate data
matA = np.random.rand(n, m).T
return matA
def _dct(m, n):
indices = np.random.permutation(n)
indices = indices[:m]
# Coding matrix known to have good properties
matA = np.copy(scipy.fftpack.dct(np.eye(n)))
matA = matA[indices, :]
return np.copy(matA, order='F')
def _toeplitz(m, n):
gauss = lambda t: np.exp(-.5 * (t**2))
ranget = np.linspace(-10, 10, m)
offset = 3.
rangemu = np.linspace(np.min(ranget)+offset, np.max(ranget)-offset, n)
matA = np.zeros((n, m)).T
for j in range(n):
matA[:, j] = gauss(ranget - rangemu[j])
return matA
def _pulse(m, n):
matA = np.zeros((n, m)).T
if __name__ == '__main__':
# Check if all dictionaries are in Fortran order
m = 10
n = 15
list_dic = ["gaussian", "uniform", "dct", "toeplitz"]
for str_dic in list_dic:
print(f"-- {str_dic} --")
matA = generate_dic(str_dic, m, n, True)
print(f"is fotran: {np.isfortran(matA)}")
print("\n")
# -*- coding: utf-8 -*-
\ No newline at end of file
# -*- coding: utf-8 -*-
from abc import ABC, abstractmethod
import numpy as np
class AbstractGapScreening(ABC):
"""docstring for Screening"""
def __init__(self):
super(AbstractGapScreening, self).__init__()
self.name = "NONAME"
def get_name(self):
return self.name
@abstractmethod
def apply_test(self, Atu_abs, gap, lbd, vec_gammas, offset_radius=0, index=None) -> np.ndarray:
""" GAP Sphere screening test detailed in Cédric's node
Parameters
----------
Atu_abs : np.ndarray
vector |matA.T @ vecu| where vecu is dual admissible
size [n]
gap : positive float
duality gap
lbd : positive float
regularization parameters
vec_gammas : np.ndarray
slope parameters
size [n,]
offset_radius : float
additive term added to the redius
index : np.ndarray
Array of indices that sort Atu in absolute value
(unused here)
default is None
Returns
-------
calI_screen : np.ndarray
vector of boolean
True if screening test passes and False otherwise
size [n,]
"""
pass
\ No newline at end of file
This diff is collapsed.
# -*- coding: utf-8 -*-
# Based on https://cython.readthedocs.io/en/latest/src/tutorial/numpy.html
# https://cython.readthedocs.io/en/latest/src/tutorial/memory_allocation.html
import numpy as np
from src.screening.ascreening import AbstractGapScreening
# "cimport" is used to import special compile-time information
# about the numpy module (this is stored in a file numpy.pxd which is
# currently part of the Cython distribution).
cimport numpy as np
# from libcpp cimport bool as bool_t
cimport cython
from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
ctypedef np.npy_bool bool_t
#(AbstractGapScreening)
cdef class GAP_Ptest:
""" Generalized test
"""
cdef double *vec_cumsum_gammas
@cython.boundscheck(False) # turn off bounds-checking for entire function
@cython.wraparound(False) # turn off negative index wrapping for entire function
def __init__(
self, np.ndarray[np.npy_double, ndim=1] vec_gammas
):
cdef int n = vec_gammas.shape[0]
cdef int i
self.vec_cumsum_gammas = <double*> PyMem_Malloc((n+1) * sizeof(double))
self.vec_cumsum_gammas[0] = 0
for i in range(n):
self.vec_cumsum_gammas[i+1] = self.vec_cumsum_gammas[i] + vec_gammas[i]
# Adding zero at the beginning
# cdef np.ndarray[np.npy_double, ndim=1] buf = np.zeros(vec_cumsum_gammas.shape[0]+1)
# buf[1:] = vec_cumsum_gammas
# self.vec_cumsum_gammas = &buf[0]
def __dealloc__(self):
PyMem_Free(self.vec_cumsum_gammas) # no-op if self.data is NULL
def get_name(self):
return "p-test cython"
@cython.boundscheck(False) # turn off bounds-checking for entire function
# @cython.wraparound(False) # turn off negative index wrapping for entire function
def apply_test(self,
np.ndarray[np.npy_double, ndim=1] Atcabs,
double gap,
double lbd,
np.ndarray[np.npy_double, ndim=1] vec_gammas,
double offset_radius=0.,
np.ndarray[long, ndim=1] index=None
):
""" Apply the Gap safe sphere screening test
Implementation of the safe screening test
Parameters
----------
Atcabs : np.ndarray
vector matA.T @ vecc where vec is dual admissible in absolute value
size [n]
gap : positive float
duality gap
lbd : positive float
regularization parameters
vec_gammas : np.ndarray
slope parameters
size [n,]
offset_radius : float
additive term added to the redius
default is 0
index : np.ndarray
Array of indices that sort Atu in absolute value
default is None
Returns
-------
calI_screen : np.ndarray
vector of boolean
True if screening test passes and False otherwise
size [n,]
"""
# !!! Important remark !!!
# all indexes below range from 1 to n (instead of 0 to n-1)
# in order to match the paper indexation rules
cdef double radius = np.sqrt(2 * gap) + offset_radius
cdef int n = Atcabs.shape[0]
cdef int k, q, r
cdef double tau = 0
# arange because I want entries 0, 1 being 0 and 1!
cdef np.ndarray[np.int_t, ndim=1] vec_p_star = np.arange(n+1, dtype=long)
cdef np.ndarray[np.int_t, ndim=1] vec_q_star = np.arange(n+1, dtype=long)
# output
cdef np.ndarray[np.npy_bool, ndim=1] calI_screen = np.zeros(n, dtype=bool)
# 1. Sort in descending order
if index is None:
index = np.argsort(Atcabs)[::-1]
# 2. Precomputing quantities
cdef np.ndarray[np.npy_double, ndim=1] vec_f = np.cumsum(
lbd * vec_gammas[::-1] - Atcabs[index[::-1]] - radius
)[::-1]
cdef double best_bound_q = vec_f[0] - lbd * vec_gammas[0]
cdef double curr_bound_q = 0.
cdef double best_bound_p = vec_f[0]
cdef double curr_bound_p = 0.
for k in range(2, n+1):
#+1 is to match paper index
# 1. Evaluate p*
curr_bound_p = vec_f[k-1]
if curr_bound_p > best_bound_p:
best_bound_p = curr_bound_p
vec_p_star[k] = k
else:
vec_p_star[k] = vec_p_star[k-1]
# 1. Evaluate q*
curr_bound_q = vec_f[k-1] - lbd * vec_gammas[k-1]
if curr_bound_q > best_bound_q:
best_bound_q = curr_bound_q
vec_q_star[k] = k
else:
vec_q_star[k] = vec_q_star[k-1]
# 3. Tests
for l in range(n, 0, -1):
q = vec_q_star[l]
while q >= 1:
# Implicit definition of the best value of p given q
p = vec_p_star[q]
# Evaluation of the threshold
tau = vec_f[p-1] - vec_f[q-1] + (lbd * vec_gammas[q-1] - radius)
# Test
if Atcabs[index[l-1]] >= tau:
calI_screen[index[l:]] = True
return calI_screen
# Next critical point
q = vec_q_star[p-1]
calI_screen[index[l:]] = True
return calI_screen
\ No newline at end of file
This diff is collapsed.
# -*- coding: utf-8 -*-
# Based on https://cython.readthedocs.io/en/latest/src/tutorial/numpy.html
# https://cython.readthedocs.io/en/latest/src/tutorial/memory_allocation.html
import numpy as np
from src.screening.ascreening import AbstractGapScreening
# "cimport" is used to import special compile-time information
# about the numpy module (this is stored in a file numpy.pxd which is
# currently part of the Cython distribution).
cimport numpy as np
# from libcpp cimport bool as bool_t
cimport cython
from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
ctypedef np.npy_bool bool_t
#(AbstractGapScreening)
cdef class GAP_RQtest:
""" Generalized test
"""
cdef double *vec_cumsum_gammas
cdef long *vec_rq
@cython.boundscheck(False) # turn off bounds-checking for entire function
@cython.wraparound(False) # turn off negative index wrapping for entire function
def __init__(
self,
np.ndarray[np.npy_double, ndim=1] vec_gammas,
np.ndarray[long, ndim=1] vec_rq,
):
cdef int n = vec_gammas.shape[0]
cdef int i
self.vec_cumsum_gammas = <double*> PyMem_Malloc((n+1) * sizeof(double))
self.vec_cumsum_gammas[0] = 0
self.vec_rq = <long*> PyMem_Malloc(n * sizeof(long))
for i in range(n):
self.vec_cumsum_gammas[i+1] = self.vec_cumsum_gammas[i] + vec_gammas[i]
self.vec_rq[i] = vec_rq[i]
def __dealloc__(self):
PyMem_Free(self.vec_cumsum_gammas) # no-op if self.data is NULL
PyMem_Free(self.vec_rq)
def get_name(self):
return "p-test cython"
@cython.boundscheck(False) # turn off bounds-checking for entire function
@cython.wraparound(False) # turn off negative index wrapping for entire function
def apply_test(self,
np.ndarray[np.npy_double, ndim=1] Atcabs,
double gap,
double lbd,
np.ndarray[np.npy_double, ndim=1] vec_gammas,
double offset_radius=0.,
np.ndarray[long, ndim=1] index=None
):
""" Apply the Gap safe sphere screening test
Implementation of the safe screening test
Parameters
----------
Atcabs : np.ndarray
vector matA.T @ vecc where vec is dual admissible in absolute value
size [n]
gap : positive float
duality gap
lbd : positive float
regularization parameters
vec_gammas : np.ndarray
slope parameters
size [n,]
offset_radius : float
additive term added to the redius
default is 0
index : np.ndarray
Array of indices that sort Atu in absolute value
default is None
Returns
-------
calI_screen : np.ndarray
vector of boolean
True if screening test passes and False otherwise
size [n,]
"""
cdef double radius = np.sqrt(2 * gap) + offset_radius
cdef int n = Atcabs.shape[0]
cdef int k, l, q, r_q
cdef double lhs
# right-hand side
cdef np.ndarray[np.npy_double, ndim=1] vec_kappaqr
vec_kappaqr = np.zeros(n+1, dtype=np.double)
for q in range(1, n+1):
r_q = self.vec_rq[q-1]
vec_kappaqr[q] += lbd * (self.vec_cumsum_gammas[q] - self.vec_cumsum_gammas[r_q-1])
vec_kappaqr[q] -= (q - r_q + 1) * radius
# output
cdef np.ndarray[np.npy_bool, ndim=1] calI_screen = np.ones(n, dtype=bool)
# 1. Sort in descending order
if index is None:
index = np.argsort(Atcabs)[::-1]
cdef np.ndarray[np.npy_double, ndim=1] vec_cumsumAtcabs = np.zeros(n+1, dtype=np.double)
vec_cumsumAtcabs[1:] = np.cumsum(Atcabs[index])
for l in range(n):
index_l = index[l]
for q in range(1, n+1): # Index are shifted by 1
r_q = self.vec_rq[q-1]
# lhs = 0
if r_q >= q:
lhs = Atcabs[index_l]
# elif index_l < index[r_q-1] or index_l >= index[q-1]:
elif l < r_q or l > q-1:
#In this calse |A_{\l}c|_[k] =|Atc|_[k] for the range of k
lhs = Atcabs[index_l] + vec_cumsumAtcabs[q-1] - vec_cumsumAtcabs[r_q-2]
# for k in range(r_q-1, q-1):
# lhs += Atcabs[index[k]]
else:
# print("ici else")
lhs = vec_cumsumAtcabs[q] - vec_cumsumAtcabs[r_q-2]
# for k in range(r_q-1, q):
# lhs += Atcabs[index[k]]
if lhs >= vec_kappaqr[q]:
calI_screen[index_l] = False
<