7

子集和问题以 NP 完全而闻名,但是有各种技巧可以快速解决问题的版本。

通常的动态规划算法需要随目标总和增长的空间。我的问题是:我们可以减少这个空间需求吗?

我正在尝试解决具有少量元素但目标和非常大的子集和问题。元素的数量对于指数时间算法(和快捷方法)来说太大了,对于通常的动态规划方法来说目标和太大。

考虑这个说明问题的玩具问题。给定集合A = [2, 3, 6, 8],找出总和为 的子集的数量target = 11。枚举所有子集我们看到的答案是 2:(3, 8)(2, 3, 6)

当然,动态编程解决方案给出了相同的结果 -ways[11]返回2

def subset_sum(A, target):
    ways = [0] * (target + 1)
    ways[0] = 1
    ways_next = ways[:]
    for x in A:
        for j in range(x, target + 1):
            ways_next[j] += ways[j - x]
        ways = ways_next[:]

    return ways[target]

target = 1100现在考虑以set的总和为目标A = [200, 300, 600, 800]。显然仍然有 2 个解决方案:(300, 800)(200, 300, 600). 但是,该ways阵列增长了 100 倍。

填写动态编程存储数组时是否可以跳过某些权重?对于我的示例问题,我可以计算输入集的最大公分母,然后将所有项目减少该常数,但这不适用于我的实际应用程序。

这个 SO question是相关的,但这些答案没有使用我想到的方法。Akshay此页面上的第二条评论说:

...在 n 非常小(例如 6)且总和非常大(例如 100 万)的情况下,空间复杂度将太大。为了避免大的空间复杂度,可以使用 n HASHTABLES。

这似乎更接近我正在寻找的东西,但我似乎无法真正实现这个想法。这真的可能吗?

编辑添加:要解决的问题的较小示例。有1个解决方案。

target = 5213096522073683233230240000
A = [2316931787588303659213440000,
     1303274130518420808307560000,
     834095443531789317316838400,
     579232946897075914803360000,
     425558899761116998631040000,
     325818532629605202076890000,
     257436865287589295468160000,
     208523860882947329329209600,
     172333769324749858949760000,
     144808236724268978700840000,
     123386899930738064691840000,
     106389724940279249657760000,
     92677271503532146368537600,
     81454633157401300519222500,
     72153585080604612224640000,
     64359216321897323867040000,
     57762842349846905631360000,
     52130965220736832332302400,
     47284322195679666514560000,
     43083442331187464737440000,
     39418499221729173786240000,
     36202059181067244675210000,
     33363817741271572692673536,
     30846724982684516172960000,
     28604096143065477274240000,
     26597431235069812414440000,
     24794751591313594450560000,
     23169317875883036592134400,
     21698632766175580575360000,
     20363658289350325129805625,
     19148196591638873216640000,
     18038396270151153056160000,
     17022355990444679945241600]

一个真正的问题是:

target = 262988806539946324131984661067039976436265064677212251086885351040000
A = [116883914017753921836437627140906656193895584300983222705282378240000,
     65747201634986581032996165266759994109066266169303062771721337760000,
     42078209046391411861117545770726396229802410348353960173901656166400,
     29220978504438480459109406785226664048473896075245805676320594560000,
     21468474003260924418937523352411426647858372626711204170357987840000,
     16436800408746645258249041316689998527266566542325765692930334440000,
     12987101557528213537381958571211850688210620477887024745031375360000,
     10519552261597852965279386442681599057450602587088490043475414041600,
     8693844844295746252297013588993057072273225278585528961549928960000,
     7305244626109620114777351696306666012118474018811451419080148640000,
     6224587137040149683597270084426981690799173128454727836375984640000,
     5367118500815231104734380838102856661964593156677801042589496960000,
     4675356560710156873457505085636266247755823372039328908211295129600,
     4109200102186661314562260329172499631816641635581441423232583610000,
     3639983481521748430892521260443459881470796742937193786669693440000,
     3246775389382053384345489642802962672052655119471756186257843840000,
     2914003396564502206448583502127866774917064428556368433095682560000,
     2629888065399463241319846610670399764362650646772122510868853510400,
     2385386000362324935437502594712380738650930291856800463373109760000,
     2173461211073936563074253397248264268068306319646382240387482240000,
     1988573206351200938616141104476672789688204647842814753019927040000,
     1826311156527405028694337924076666503029618504702862854770037160000,
     1683128361855656474444701830829055849192096413934158406956066246656,
     1556146784260037420899317521106745422699793282113681959093996160000,
     1443011284169801504153550952356872298690068941987447193892375040000,
     1341779625203807776183595209525714165491148289169450260647374240000,
     1250838556670374906691960338012080744048823137584838292922165760000,
     1168839140177539218364376271409066561938955843009832227052823782400,
     1094646437211014876720019400903392201607763016346356924399106560000,
     1027300025546665328640565082293124907954160408895360355808145902500,
     965982760477305139144112620999228563585913919842836551283325440000,
     909995870380437107723130315110864970367699185734298446667423360000,
     858738960130436976757500934096457065914334905068448166814319513600,
     811693847345513346086372410700740668013163779867939046564460960000,
     768411414287644482489363509326632509674989232073666182868912640000,
     728500849141125551612145875531966693729266107139092108273920640000,
     691620793004461075955252231602997965644352569828303092930664960000,
     657472016349865810329961652667599941090662661693030627717213377600,
     625791330255672395317036671188673352614551016483550865168079360000,
     596346500090581233859375648678095184662732572964200115843277440000,
     568931977371436071675467087219123799753953628290345594563299840000,
     543365302768484140768563349312066067017076579911595560096870560000,
     519484062301128541495278342848474027528424819115480989801255014400,
     497143301587800234654035276119168197422051161960703688254981760000,
     476213321032044045508347054897310957784092466595223632570186240000,
     456577789131851257173584481019166625757404626175715713692509290000,
     438132122515529069774235170457376054037925971973698044293020160000,
     420782090463914118611175457707263962298024103483539601739016561664,
     404442609057972047876946806715939986830088526993021531852188160000,
     389036696065009355224829380276686355674948320528420489773499040000,
     374494562534633427030238036407319297168052779889230688624970240000,
     360752821042450376038387738089218074672517235496861798473093760000,
     347753793771829850091880543559722282890929011143421158461997158400,
     335444906300951944045898802381428541372787072292362565161843560000,
     323778155173833578494287055791985197213007158728485381455075840000,
     312709639167593726672990084503020186012205784396209573230541440000,
     302199145693704480473409550206308504954053507241841138853071360000,
     292209785044384804591094067852266640484738960752458056763205945600,
     282707666261699891568916593460940582033071824431295083135592960000,
     273661609302753719180004850225848050401940754086589231099776640000,
     265042888929147215048611399412486748738992254650755607041456640000,
     256825006386666332160141270573281226988540102223840088952036475625,
     248983485481605987343890803377079267631966925138189113455039385600,
     241495690119326284786028155249807140896478479960709137820831360000,
     234340660761814501342824380545368657996226388663143017230461440000,
     227498967595109276930782578777716242591924796433574611666855840000,
     220952578483466770957349011608519198854244960871423861446658560000,
     214684740032609244189375233524114266478583726267112041703579878400,
     208679870295533683104133831435857945991878646837700655494453760000,
     202923461836378336521593102675185167003290944966984761641115240000,
     197401994025105141026072179446079922264038329650750423033879040000,
     192102853571911120622340877331658127418747308018416545717228160000,
     187014262428406274938300203425450649910232934881573156328451805184,
     182125212285281387903036468882991673432316526784773027068480160000,
     177425404985627474536673746714144021883127046501745489011223040000,
     172905198251115268988813057900749491411088142457075773232666240000,
     168555556186474170249629649778586749838977769381324948621621760000,
     164368004087466452582490413166899985272665665423257656929303344400]
4

1 回答 1

5

在您链接到的特定评论中,建议使用哈希表仅存储实际作为某个子集的总和出现的值。在最坏的情况下,这是元素数量的指数,因此它基本上等同于您已经提到并排除的蛮力方法。

一般来说,这个问题有两个参数——集合中元素的数量和目标总和的大小。朴素的蛮力在第一个方面是指数的,而标准动态规划解决方案在第二个方面是指数的。当其中一个参数很小时,这很有效,但是您已经指出这两个参数对于指数解决方案来说都太大了。因此,您会遇到问题的“困难”一般情况。

大多数 NP 完全问题都有一些潜在的图,无论是隐式的还是显式的。使用图分区和 DP,它可以在图的树宽中以指数方式求解,但在树宽保持不变的情况下,图的大小只能是多项式。当然,如果不访问您的数据,就不可能说出底层图形可能是什么样子,或者它是否属于具有有界树宽的图形类别之一,因此可以有效地解决。

编辑:我刚刚写了下面的代码来说明我通过减少它来减少它的意思。下面的代码在不到一秒的时间内解决了您的第一个问题,但它不适用于更大的问题(尽管它确实将其简化为n=57, log(t)=68)。

target = 5213096522073683233230240000
A = [2316931787588303659213440000,
     1303274130518420808307560000,
     834095443531789317316838400,
     579232946897075914803360000,
     425558899761116998631040000,
     325818532629605202076890000,
     257436865287589295468160000,
     208523860882947329329209600,
     172333769324749858949760000,
     144808236724268978700840000,
     123386899930738064691840000,
     106389724940279249657760000,
     92677271503532146368537600,
     81454633157401300519222500,
     72153585080604612224640000,
     64359216321897323867040000,
     57762842349846905631360000,
     52130965220736832332302400,
     47284322195679666514560000,
     43083442331187464737440000,
     39418499221729173786240000,
     36202059181067244675210000,
     33363817741271572692673536,
     30846724982684516172960000,
     28604096143065477274240000,
     26597431235069812414440000,
     24794751591313594450560000,
     23169317875883036592134400,
     21698632766175580575360000,
     20363658289350325129805625,
     19148196591638873216640000,
     18038396270151153056160000,
     17022355990444679945241600]

import itertools, time
from fractions import gcd

def gcd_r(seq):
     return reduce(gcd, seq)

def miniSolve(t, vals):
     vals = [x for x in vals if x and x <= t]
     for k in range(len(vals)):
          for sub in itertools.combinations(vals, k):
               if sum(sub) == t:
                    return sub
     return None

def tryMod(n, state, answer):
     t, vals, mult = state
     mods = [x%n for x in vals if x%n]
     if (t%n or mods) and sum(mods) < n:
          print 'Filtering with', n
          print t.bit_length(), len(vals)
     else:
          return state

     newvals = list(vals)
     tmod = t%n
     if not tmod:
          for x in vals:
               if x%n:
                    newvals.remove(x)
     else:
          if len(set(mods)) != len(mods):
               #don't want to deal with the complexity of multisets for now
               print 'skipping', n
          else:
               mini = miniSolve(tmod, mods)
               if mini is None:
                    return None
               mini = set(mini)
               for x in vals:
                    mod = x%n
                    if mod:
                         if mod in mini:
                              t -= x
                              answer.add(x*mult)
                         newvals.remove(x)
     g = gcd_r(newvals + [t])
     t = t//g
     newvals = [x//g for x in newvals]
     mult *= g
     return (t, newvals, mult)

def solve(t, vals):
     answer = set()
     mult = 1
     for d in itertools.count(2):
          if not t:
               return answer
          elif not vals or t < min(vals):
               return None #no solution'
          res = tryMod(d, (t, vals, mult), answer)
          if res is None:
               return None
          t, vals, mult = res
          if len(vals) < 23:
               break

          if (d % 10000) == 0:
               print 'd', d

     #don't want to deal with the complexity of multisets for now
     assert(len(set(vals)) == len(vals))
     rest = miniSolve(t, vals)
     if rest is None:
          return None
     answer.update(x*mult for x in rest)
     return answer

start_t = time.time()
answer = solve(target, A)
assert(answer <= set(A) and sum(answer) == target)
print answer
于 2013-08-25T19:34:57.623 回答