In the following, we use the benchmark instance labels provided by GBD’s meta.db, and the solver runtimes in the 2023 SAT competition.
from gbd_core.api import GBD
import pandas as pd
def get_solvers():
with GBD(['data/sc2023/results_main_detailed.csv']) as gbd:
return [ s for s in gbd.get_features() if s not in ["aresult", "vresult"] ]
def get_sc23_data(runtimes, metadata, addvbs=False):
with GBD(['data/meta.db', 'data/sc2023/results_main_detailed.csv']) as gbd:
df = gbd.query("track=main_2023", resolve=runtimes + metadata, collapse='min')
# convert runtimes to numeric values
df[runtimes] = df[runtimes].apply(pd.to_numeric)
# penalize timeouts
for solver in runtimes:
df.loc[df[solver] >= 5000, solver] = 10000
if addvbs:
# compute the vbs score
df["vbs"] = df[runtimes].min(axis=1)
return df
Next, we determine the best 3-portfolio of solvers in the 2023 SAT competition.
from itertools import combinations
def pscore(df: pd.DataFrame, solvers: list[str]):
return df[solvers].min(axis=1).mean()
def pscores_all(df: pd.DataFrame, solvers: list[str], k: int):
return sorted([ (comb, pscore(df, list(comb))) for comb in combinations(solvers, k) ], key=lambda k : k[1])
def pscores_ext(df: pd.DataFrame, solvers: list[str], tuples: list[tuple[str]]):
tupset = set(frozenset(comb + (s,)) for comb in tuples for s in solvers if s not in comb)
return sorted([ (tuple(comb), pscore(df, list(comb))) for comb in tupset ], key=lambda k : k[1])
beam_width: int = 10
pf3 = get_solvers()
runtimes = get_sc23_data(pf3, [])
pfs1 = pscores_all(runtimes, pf3, 1)
pfs2 = pscores_all(runtimes, pf3, 2)[:beam_width]
pfs3 = pscores_ext(runtimes, pf3, [tup[0] for tup in pfs2])[:beam_width]
pf1, pf1score = list(pfs1[0][0]), round(pfs1[0][1], 2)
pf2, pf2score = list(pfs2[0][0]), round(pfs2[0][1], 2)
pf3, pf3score = list(pfs3[0][0]), round(pfs3[0][1], 2)
print("Single-best Solver and Best 2- and 3-Portfolios")
print(pf1score, pf1)
ordered = [ s[0][0] for s in pfs1 ]
print(pf2score, sorted(pf2, key=lambda s : ordered.index(s)))
print(pf3score, sorted(pf3, key=lambda s : ordered.index(s)))
Single-best Solver and Best 2- and 3-Portfolios
3274.01 ['SBVA_sbva_cadical']
2434.72 ['SBVA_sbva_cadical', 'Kissat_MAB_prop_pr_no_sym']
2138.96 ['SBVA_sbva_cadical', 'Kissat_MAB_prop_pr_no_sym', 'BreakID_kissat_low_sh']
Determine the PAR-2 scores per instance category for each solver in the best 3-portfolio.
df = get_sc23_data(pf3, ["family"])
# group families with less than 5 instances into a single group
misc = df.groupby("family").count().query("hash < 5").index.tolist()
df.replace(misc, "miscellaneous", inplace=True)
# compute family sizes and family-wise scores
counts = df.groupby('family').count()["hash"].rename("count")
groups = df.groupby('family').mean(numeric_only=True)
tab = groups.merge(counts, on='family', how='left').reset_index()
# sort families by the difference between the best and worst solver
tab["diff"] = tab[pf3].max(axis=1) - tab[pf3].min(axis=1)
tab.sort_values(by="diff", ascending=False, inplace=True)
tab = tab[["family", "count"] + pf3].reset_index(drop=True)
tab.to_markdown("family_scores.md", index=False, floatfmt=".2f")
# output family_scores.md
with open("family_scores.md", "r") as f:
print(f.read())
| family | count | Kissat_MAB_prop_pr_no_sym | SBVA_sbva_cadical | BreakID_kissat_low_sh |
|:-----------------------------|--------:|----------------------------:|--------------------:|------------------------:|
| interval-matching | 20 | 0.15 | 10000.00 | 10000.00 |
| or_randxor | 5 | 10000.00 | 21.82 | 103.93 |
| hashtable-safety | 20 | 194.75 | 797.46 | 10000.00 |
| satcoin | 15 | 10000.00 | 1395.53 | 10000.00 |
| set-covering | 20 | 5761.57 | 722.01 | 262.39 |
| cryptography-ascon | 20 | 5628.35 | 356.82 | 2673.77 |
| grs-fp-comm | 17 | 3435.82 | 3649.90 | 8258.64 |
| reg-n | 5 | 6295.16 | 10000.00 | 10000.00 |
| mutilated-chessboard | 12 | 1656.50 | 3194.54 | 5135.77 |
| profitable-robust-production | 20 | 3946.63 | 2470.42 | 5031.37 |
| hardware-verification | 8 | 1558.52 | 2832.05 | 3964.51 |
| register-allocation | 20 | 5.50 | 101.20 | 2016.39 |
| tseitin | 11 | 8193.40 | 8196.91 | 6393.20 |
| social-golfer | 20 | 7665.62 | 7555.16 | 9013.05 |
| miscellaneous | 70 | 2563.98 | 3164.75 | 3918.52 |
| pigeon-hole | 8 | 6381.16 | 5261.85 | 5128.03 |
| school-timetabling | 19 | 1266.09 | 1399.65 | 2371.42 |
| brent-equations | 19 | 408.33 | 232.86 | 1133.50 |
| miter | 11 | 3157.68 | 3134.91 | 3723.05 |
| argumentation | 20 | 4144.27 | 3693.93 | 3820.58 |
| subsumptiontest | 5 | 84.60 | 230.22 | 89.14 |
| planning | 6 | 91.26 | 6.97 | 10.98 |
| quasigroup-completion | 5 | 60.78 | 9.33 | 3.43 |
| cryptography | 7 | 1577.96 | 1578.46 | 1570.64 |
| cryptography-simon | 17 | 10000.00 | 10000.00 | 10000.00 |