Самая длинная последовательная последовательность в массиве Unsorted

Вам предоставляется массив чисел, и они являются несортированными/случайными. Вы должны найти самую длинную последовательность последовательных чисел в массиве. Обратите внимание, что последовательность не обязательно должна быть упорядочена в массиве. Вот пример:

Вход:

A[] = {10,21,45,22,7,2,67,19,13,45,12,11,18,16,17,100,201,20,101}

Выход:

{16,17,18,19,20,21,22}

Решение должно иметь сложность O (n).

Мне сказали, что решение включает использование хеш-таблицы, и я столкнулся с несколькими реализациями, которые использовали 2 хэш-таблицы. Нельзя сортировать и решать это, потому что для сортировки потребуется O (nlgn), что не является желаемым.

Ответ 1

Вот решение в Python, которое использует только один набор хэшей и не делает слияния с фантазийным интервалом.

def destruct_directed_run(num_set, start, direction):
  while start in num_set:
    num_set.remove(start)
    start += direction
  return start

def destruct_single_run(num_set):
  arbitrary_member = iter(num_set).next()
  bottom = destruct_directed_run(num_set, arbitrary_member, -1) 
  top = destruct_directed_run(num_set, arbitrary_member + 1, 1)
  return range(bottom + 1, top)

def max_run(data_set):
  nums = set(data_set)
  best_run = []
  while nums:
    cur_run = destruct_single_run(nums)
    if len(cur_run) > len(best_run):
      best_run = cur_run
  return best_run

def test_max_run(data_set, expected):
  actual = max_run(data_set)
  print data_set, actual, expected, 'Pass' if expected == actual else 'Fail'

print test_max_run([10,21,45,22,7,2,67,19,13,45,12,11,18,16,17,100,201,20,101], range(16, 23))
print test_max_run([1,2,3], range(1, 4))
print max_run([1,3,5]), 'any singleton output fine'

Ответ 2

У вас может быть две таблицы:

Начальная таблица: (начальная точка, длина)
Конечная таблица: (конечная точка, длина)

При добавлении нового элемента вы должны проверить:

Существует ли значение + 1 в стартовой таблице? Если это так, удалите его и создайте новую запись (значение, длина + 1), где length является "текущей" длиной. Вы также обновили бы конечную таблицу с той же конечной точкой, но с большей длиной.
Значит ли значение - 1 в конце таблицы? Если это так, удалите его и создайте новую запись (значение, длина + 1) и на этот раз обновите стартовую таблицу (начальная позиция будет одинаковой, но длина будет увеличена)

Если оба условия выполнены, то вы эффективно сшиваете две существующие последовательности вместе - замените четыре существующие записи двумя новыми элементами, представляя одну более длинную последовательность.

Если ни одно из условий не выполняется, вы просто создаете новую запись длиной 1 в обеих таблицах.

После того, как все значения были добавлены, вы можете просто перебирать стартовую таблицу, чтобы найти ключ с наибольшим значением.

Я думаю, что это сработает и будет O (n), если мы предположим, что O (1) hash lookup/add/delete.

EDIT: реализация С#. Потребовалось немного времени, чтобы поправиться, но я думаю, что это работает:)

using System;
using System.Collections.Generic;

class Test
{
    static void Main(string[] args)
    {
        int[] input = {10,21,45,22,7,2,67,19,13,45,12,
                11,18,16,17,100,201,20,101};

        Dictionary<int, int> starts = new Dictionary<int, int>();
        Dictionary<int, int> ends = new Dictionary<int, int>();

        foreach (var value in input)
        {
            int startLength;
            int endLength;
            bool extendsStart = starts.TryGetValue(value + 1,
                                                   out startLength);
            bool extendsEnd = ends.TryGetValue(value - 1,
                                               out endLength);

            // Stitch together two sequences
            if (extendsStart && extendsEnd)
            {
                ends.Remove(value + 1);
                starts.Remove(value - 1);
                int start = value - endLength;
                int newLength = startLength + endLength + 1;
                starts[start] = newLength;
                ends[start + newLength - 1] = newLength;
            }
            // Value just comes before an existing sequence
            else if (extendsStart)
            {
                int newLength = startLength + 1;
                starts[value] = newLength;
                ends[value + newLength - 1] = newLength;
                starts.Remove(value + 1);
            }
            else if (extendsEnd)
            {
                int newLength = endLength + 1;
                starts[value - newLength + 1] = newLength;
                ends[value] = newLength;
                ends.Remove(value - 1);
            }
            else
            {
                starts[value] = 1;
                ends[value] = 1;
            }
        }

        // Just for diagnostics - could actually pick the longest
        // in O(n)
        foreach (var sequence in starts)
        {
            Console.WriteLine("Start: {0}; Length: {1}",
                              sequence.Key, sequence.Value);
        }
    }
}

РЕДАКТИРОВАТЬ: Здесь один-хэш-ответ, реализованный на С# тоже - я согласен, это проще, чем выше, но я оставляю свой первоначальный ответ для потомков:

using System;
using System.Collections.Generic;
using System.Linq;

class Test
{
    static void Main(string[] args)
    {
        int[] input = {10,21,45,22,7,2,67,19,13,45,12,
                11,18,16,17,100,201,20,101};

        HashSet<int> values = new HashSet<int>(input);

        int bestLength = 0;
        int bestStart = 0;
        // Can't use foreach as we're modifying it in-place
        while (values.Count > 0)
        {
            int value = values.First();
            values.Remove(value);
            int start = value;
            while (values.Remove(start - 1))
            {
                start--;
            }
            int end = value;
            while (values.Remove(end + 1))
            {
                end++;
            }

            int length = end - start + 1;
            if (length > bestLength)
            {
                bestLength = length;
                bestStart = start;
            }
        }
        Console.WriteLine("Best sequence starts at {0}; length {1}",
                          bestStart, bestLength);
    }
}

Ответ 3

Сбросьте все на хэш-набор.

Теперь перейдите через хешсет. Для каждого элемента найдите набор для всех значений, близких к текущему значению. Следите за наибольшей последовательностью, которую вы можете найти, при удалении элементов, найденных из набора. Сохраните счетчик для сравнения.

Повторяйте это, пока хешсет не будет пуст.

Предполагая, что поиск, вставка и удаление - это время O (1), этот алгоритм будет временем O (N).

Псевдокод:

 int start, end, max
 int temp_start, temp_end, count

 hashset numbers

 for element in array:
     numbers.add(element)

 while !numbers.empty():
     number = numbers[0]
     count = 1
     temp_start, temp_end = number 

     while numbers.contains(number - 1):
         temp_start = number - 1; count++
         numbers.remove(number - 1)

     while numbers.contains(number + 1):
         temp_end = number + 1; count++
         numbers.remove(number + 1)

     if max < count:
         max = count
         start = temp_start; end = temp_end

 max_range = range(start, end)

Вложенные whiles не выглядят красиво, но каждый номер должен использоваться только один раз, поэтому должен быть O (N).

Ответ 4

Другое решение - с хэш-поиском, который выполняется в O (n)

int maxCount = 0;
for (i = 0; i<N; i++) 
{ 
    // Search whether a[i] - 1 is present in the list.If it is present, 
    // you don't need to initiate count since it  will be counted when 
    // (a[i] - 1) is traversed.
    if (hash_search(a[i]-1))
        continue;

    // Now keep checking if a[i]++ is present in the list, increment the count
    num = a[i]; 
    while (hash_search(++num)) 
        count++;

    // Now check if this count is greater than the max_count got previously 
    // and update if it is
    if (count > maxCount)
    {
        maxIndex = i;
        count = maxCount;
    }
}

Ответ 5

Вот реализация:

static int[] F(int[] A)
{
    Dictionary<int, int> low = new Dictionary<int, int>();
    Dictionary<int, int> high = new Dictionary<int, int>();

    foreach (int a in A)
    {
        int lowLength, highLength;

        bool lowIn = low.TryGetValue(a + 1, out lowLength);
        bool highIn = high.TryGetValue(a - 1, out highLength);

        if (lowIn)
        {
            if (highIn)
            {
                low.Remove(a + 1);
                high.Remove(a - 1);
                low[a - highLength] = high[a + lowLength] = lowLength + highLength + 1;
            }
            else
            {
                low.Remove(a + 1);
                low[a] = high[a + lowLength] = lowLength + 1;
            }
        }
        else
        {
            if (highIn)
            {
                high.Remove(a - 1);
                high[a] = low[a - highLength] = highLength + 1;
            }
            else
            {
                high[a] = low[a] = 1;
            }
        }
    }

    int maxLow = 0, maxLength = 0;
    foreach (var pair in low)
    {
        if (pair.Value > maxLength)
        {
            maxLength = pair.Value;
            maxLow = pair.Key;
        }
    }

    int[] ret = new int[maxLength];
    for (int i = 0; i < maxLength; i++)
    {
        ret[i] = maxLow + i;
    }

    return ret;
}

Ответ 6

class Solution {
public:
    struct Node{
        int lower;
        int higher;
        Node(int l, int h):lower(l),higher(h){

    }
};
int longestConsecutive(vector<int> &num) {
    // Start typing your C/C++ solution below
    // DO NOT write int main() function

    map<int,Node> interval_map;
    map<int,Node>::iterator curr_iter,inc_iter,des_iter;

    //first collect
    int curr = 0;
    int max = -1;
    for(size_t i = 0; i < num.size(); i++){
        curr = num[i];
        curr_iter = interval_map.find(curr);
        if (curr_iter == interval_map.end()){
            interval_map.insert(make_pair(curr,Node(curr,curr)));
        }
    } 
    //the next collect    
    for(curr_iter = interval_map.begin(); curr_iter != interval_map.end(); curr_iter++)
    {
        int lower = curr_iter->second.lower;
        int higher = curr_iter->second.higher;
        int newlower = lower, newhigher = higher;

        des_iter = interval_map.find(lower - 1);
        if (des_iter != interval_map.end())
        {
            curr_iter->second.lower = des_iter->second.lower;
            newlower = des_iter->second.lower;
        }

        inc_iter = interval_map.find(higher + 1);
        if (inc_iter != interval_map.end()){
            curr_iter->second.higher = inc_iter->second.higher;
            newhigher = inc_iter->second.higher;
        }

        if (des_iter != interval_map.end()){
            des_iter->second.higher = newhigher;
        }
        if (inc_iter != interval_map.end()){
            inc_iter->second.lower = newlower;
        }
        if (curr_iter->second.higher - curr_iter->second.lower + 1> max){
             max = curr_iter->second.higher - curr_iter->second.lower + 1;
         }
    }   
    return max;
}
};

Ответ 7

Это решение Григора Геворкяна из дубликата этого вопроса, но я считаю упрощенным:

data = [1,3,5,7,4,6,10,3]

# other_sides[x] == other end of interval starting at x
# unknown values for any point not the end of an interval
other_sides = {}
# set eliminates duplicates, and is assumed to be an O(n) operation
for element in set(data):
    # my intervals left hand side will be the left hand side
    # of an interval ending just before this element
    try:
        left = other_sides[element - 1]
    except KeyError:
        left = element

    # my intervals right hand side will be the right hand side
    # of the interval starting just after me
    try:
        right = other_sides[element + 1]
    except KeyError:
        right = element

    # satisfy the invariants
    other_sides[left] = right
    other_sides[right] = left

# convert the dictionary to start, stop segments
# each segment is recorded twice, so only keep half of them
segments = [(start, stop) for start, stop in other_sides.items() if start <= stop]
# find the longest one
print max(segments, key = lambda segment: segment[1] - segment[0])

Ответ 8

Здесь код python, основанный на ответе Григор Геворкян по аналогичному вопросу, я думаю, что это очень элегантное решение этой проблемы

l = [10,21,45,22,7,2,67,19,13,45,12,11,18,16,17,100,201,20,101]
d = {x:None for x in l}
print d
for (k, v) in d.iteritems():
    if v is not None: continue
    a, b = d.get(k - 1), d.get(k + 1)
    if a is not None and b is not None: d[k], d[a], d[b] = k, b, a
    elif a is not None: d[a], d[k] = k, a
    elif b is not None: d[b], d[k] = k, b
    else: d[k] = k
    print d

m = max(d, key=lambda x: d[x] - x)
print m, d[m]

выход:

{2: 2, 67: None, 100: None, 101: None, 7: None, 201: None, 10: None, 11: None, 12: None, 45: None, 13: None, 16: None, 17: None, 18: None, 19: None, 20: None, 21: None, 22: None}
{2: 2, 67: 67, 100: None, 101: None, 7: None, 201: None, 10: None, 11: None, 12: None, 45: None, 13: None, 16: None, 17: None, 18: None, 19: None, 20: None, 21: None, 22: None}
{2: 2, 67: 67, 100: 100, 101: None, 7: None, 201: None, 10: None, 11: None, 12: None, 45: None, 13: None, 16: None, 17: None, 18: None, 19: None, 20: None, 21: None, 22: None}
{2: 2, 67: 67, 100: 101, 101: 100, 7: None, 201: None, 10: None, 11: None, 12: None, 45: None, 13: None, 16: None, 17: None, 18: None, 19: None, 20: None, 21: None, 22: None}
{2: 2, 67: 67, 100: 101, 101: 100, 7: 7, 201: None, 10: None, 11: None, 12: None, 45: None, 13: None, 16: None, 17: None, 18: None, 19: None, 20: None, 21: None, 22: None}
{2: 2, 67: 67, 100: 101, 101: 100, 7: 7, 201: 201, 10: None, 11: None, 12: None, 45: None, 13: None, 16: None, 17: None, 18: None, 19: None, 20: None, 21: None, 22: None}
{2: 2, 67: 67, 100: 101, 101: 100, 7: 7, 201: 201, 10: 10, 11: None, 12: None, 45: None, 13: None, 16: None, 17: None, 18: None, 19: None, 20: None, 21: None, 22: None}
{2: 2, 67: 67, 100: 101, 101: 100, 7: 7, 201: 201, 10: 11, 11: 10, 12: None, 45: None, 13: None, 16: None, 17: None, 18: None, 19: None, 20: None, 21: None, 22: None}
{2: 2, 67: 67, 100: 101, 101: 100, 7: 7, 201: 201, 10: 12, 11: 10, 12: 10, 45: None, 13: None, 16: None, 17: None, 18: None, 19: None, 20: None, 21: None, 22: None}
{2: 2, 67: 67, 100: 101, 101: 100, 7: 7, 201: 201, 10: 12, 11: 10, 12: 10, 45: 45, 13: None, 16: None, 17: None, 18: None, 19: None, 20: None, 21: None, 22: None}
{2: 2, 67: 67, 100: 101, 101: 100, 7: 7, 201: 201, 10: 13, 11: 10, 12: 10, 45: 45, 13: 10, 16: None, 17: None, 18: None, 19: None, 20: None, 21: None, 22: None}
{2: 2, 67: 67, 100: 101, 101: 100, 7: 7, 201: 201, 10: 13, 11: 10, 12: 10, 45: 45, 13: 10, 16: 16, 17: None, 18: None, 19: None, 20: None, 21: None, 22: None}
{2: 2, 67: 67, 100: 101, 101: 100, 7: 7, 201: 201, 10: 13, 11: 10, 12: 10, 45: 45, 13: 10, 16: 17, 17: 16, 18: None, 19: None, 20: None, 21: None, 22: None}
{2: 2, 67: 67, 100: 101, 101: 100, 7: 7, 201: 201, 10: 13, 11: 10, 12: 10, 45: 45, 13: 10, 16: 18, 17: 16, 18: 16, 19: None, 20: None, 21: None, 22: None}
{2: 2, 67: 67, 100: 101, 101: 100, 7: 7, 201: 201, 10: 13, 11: 10, 12: 10, 45: 45, 13: 10, 16: 19, 17: 16, 18: 16, 19: 16, 20: None, 21: None, 22: None}
{2: 2, 67: 67, 100: 101, 101: 100, 7: 7, 201: 201, 10: 13, 11: 10, 12: 10, 45: 45, 13: 10, 16: 20, 17: 16, 18: 16, 19: 16, 20: 16, 21: None, 22: None}
{2: 2, 67: 67, 100: 101, 101: 100, 7: 7, 201: 201, 10: 13, 11: 10, 12: 10, 45: 45, 13: 10, 16: 21, 17: 16, 18: 16, 19: 16, 20: 16, 21: 16, 22: None}
{2: 2, 67: 67, 100: 101, 101: 100, 7: 7, 201: 201, 10: 13, 11: 10, 12: 10, 45: 45, 13: 10, 16: 22, 17: 16, 18: 16, 19: 16, 20: 16, 21: 16, 22: 16}
16 22