# test if the list contains a number to some extent

Let's say I have a list L=[1.1, 1.8, 4.4, 5.2]. For some integer, n, I want to know whether L has a value val with n-1<val<n+1, and if so I want to know the index of val.

The best I can do so far is to define a generator

x = (index for index,val in enumerate(L) if n-1<val<n+1)

and check to see whether it has an appropriate value using try... except. So let's assume I'm looking for the smallest n>=0 for which such a value exists...

L=[1.1, 1.8, 4.4, 5.2]
n=0
while True:
x = (index for index,val in enumerate(L) if n-1<val<n+1)
try:
index=next(x)
break
except StopIteration:
n+=1
print n,index

1 0

In reality, I'm doing a more complicated task. I'll want to be able to take an n, find the first index, and if it doesn't exist, I need to do something else.

This doesn't seem like particularly clean code to me. Is there a better way? I feel like numpy probably has the answer, but I don't know it well enough.

If L is sorted, you could use bisect.bisect_left to find the index i for which all L[< i] < n <= all L[>= i].

Then

if n - L[i-1] < 1.0:
val = L[i-1]
elif L[i] - n < 1.0:
val = L[i]
else:
val = None     # no such value found

Edit: Depending on your data, what you want to accomplish, and how much time you want to spend writing a clever algorithm, sorting may or may not be a good solution for you; and before I see too many more O(n)s waved around, I would like to point out that his actual problem seems to involve repeatedly probing for various values of n - which would pretty rapidly amortize the initial sorting overhead - and that his suggested algorithm above is actually O(n**2).

@AntoinePelisse: by all means, let's do some profiling:

from bisect import bisect_left, bisect_right
from functools import partial
import matplotlib.pyplot as plt
from random import randint, uniform
from timeit import timeit

#blues
density_col_lin = [
(0.000, 0.502, 0.000, 1.000),
(0.176, 0.176, 0.600, 1.000),
(0.357, 0.357, 0.698, 1.000),
(0.537, 0.537, 0.800, 1.000)
]

# greens
density_col_sor = [
(0.000, 0.502, 0.000, 1.000),
(0.176, 0.600, 0.176, 1.000),
(0.357, 0.698, 0.357, 1.000),
(0.537, 0.800, 0.537, 1.000)
]

def make_data(length, density):
max_ = length / density
return [uniform(0.0, max_) for _ in range(length)], max_

def linear_probe(L, max_, probes):
for p in range(probes):
n = randint(0, int(max_))
for index,val in enumerate(L):
if n - 1.0 < val < n + 1.0:
# return index
break

def sorted_probe(L, max_, probes):
# initial sort
sL = sorted((val,index) for index,val in enumerate(L))
for p in range(probes):
n = randint(0, int(max_))
left  = bisect_right(sL, (n - 1.0, max_))
right = bisect_left (sL, (n + 1.0, 0.0 ), left)
if left < right:
index = min(sL[left:right], key=lambda s:s[1])[1]
# return index

def main():
densities = [0.8, 0.2, 0.08, 0.02]
probes    = [1, 3, 10, 30, 100]
lengths   = [[]                   for d in densities]
lin_pts   = [[[] for p in probes] for d in densities]
sor_pts   = [[[] for p in probes] for d in densities]

# time each function at various data lengths, densities, and probe repetitions
for d,density in enumerate(densities):
for trial in range(200):
print("{}-{}".format(density, trial))

# length in 10 to 5000, with log density
length = int(10 ** uniform(1.0, 3.699))
L, max_ = make_data(length, density)
lengths[d].append(length)

for p,probe in enumerate(probes):
lin = timeit(partial(linear_probe, L, max_, probe), number=5) / 5
sor = timeit(partial(sorted_probe, L, max_, probe), number=5) / 5
lin_pts[d][p].append(lin / probe)
sor_pts[d][p].append(sor / probe)

# plot the results
plt.figure(figsize=(9.,6.))
plt.axis([0, 5000, 0, 0.004])

for d,density in enumerate(densities):
xs = lengths[d]
lcol = density_col_lin[d]
scol = density_col_sor[d]

for p,probe in enumerate(probes):
plt.plot(xs, lin_pts[d][p], "o", color=lcol, markersize=4.0)
plt.plot(xs, sor_pts[d][p], "o", color=scol, markersize=4.0)

plt.show()

if __name__ == "__main__":
main()

which results in

x-axis is number of items in L, y-axis is amortized time per probe; green dots are sorted_probe(), blue are linear_probe().

Conclusions:

• runtimes for both functions are remarkably linear with respect to length
• for a single probe into L, presorting is about 4 times slower than iterating
• the crossover point seems to be about 5 probes; for fewer than that, linear search is faster, for more, presorting is faster.