รหัสหลามที่เร็วที่สุดเพื่อค้นหากลุ่มคำที่ชนะในเกมนี้

นี่คือเกมคำศัพท์จากชุดการ์ดกิจกรรมสำหรับเด็ก ด้านล่างกฎคือรหัสเพื่อค้นหา triplet ที่ดีที่สุดโดยใช้ / usr / share / dict / words ฉันคิดว่ามันเป็นปัญหาการเพิ่มประสิทธิภาพที่น่าสนใจและฉันสงสัยว่าผู้คนสามารถค้นหาการปรับปรุงได้หรือไม่

กฎระเบียบ

เลือกตัวอักษรหนึ่งตัวจากแต่ละชุดด้านล่าง
เลือกคำโดยใช้ตัวอักษรที่เลือก (และอื่น ๆ )
ทำคะแนนคำว่า
- ตัวอักษรแต่ละตัวจากชุดที่เลือกจะได้รับหมายเลขที่แสดงพร้อมชุด (รวมซ้ำ)
- AEIOU นับ 0
- ตัวอักษรอื่น ๆ ทั้งหมดคือ -2
ทำซ้ำขั้นตอนที่ 1-3 ด้านบน (ไม่ใช้ตัวอักษรซ้ำในขั้นตอนที่ 1) อีกสองครั้ง
คะแนนสุดท้ายคือผลรวมของคะแนนสามคำ

ชุด

(ชุดที่ 1 คะแนน 1 คะแนนชุดที่ 2 คะแนน 2 คะแนน ฯลฯ )

รหัส:

from itertools import permutations
import numpy as np

points = {'LTN' : 1,
          'RDS' : 2,
          'GBM' : 3,
          'CHP' : 4,
          'FWV' : 5,
          'YKJ' : 6,
          'QXZ' : 7}

def tonum(word):
    word_array = np.zeros(26, dtype=np.int)
    for l in word:
        word_array[ord(l) - ord('A')] += 1
    return word_array.reshape((26, 1))

def to_score_array(letters):
    score_array = np.zeros(26, dtype=np.int) - 2
    for v in 'AEIOU':
        score_array[ord(v) - ord('A')] = 0
    for idx, l in enumerate(letters):
        score_array[ord(l) - ord('A')] = idx + 1
    return np.matrix(score_array.reshape(1, 26))

def find_best_words():
    wlist = [l.strip().upper() for l in open('/usr/share/dict/words') if l[0].lower() == l[0]]
    wlist = [l for l in wlist if len(l) > 4]
    orig = [l for l in wlist]
    for rep in 'AEIOU':
        wlist = [l.replace(rep, '') for l in wlist]
    wlist = np.hstack([tonum(w) for w in wlist])

    best = 0
    ct = 0
    bestwords = ()
    for c1 in ['LTN']:
        for c2 in permutations('RDS'):
            for c3 in permutations('GBM'):
                for c4 in permutations('CHP'):
                    for c5 in permutations('FWV'):
                        for c6 in permutations('YJK'):
                            for c7 in permutations('QZX'):
                                vals = [to_score_array(''.join(s)) for s in zip(c1, c2, c3, c4, c5, c6, c7)]
                                ct += 1
                                print ct, 6**6
                                scores1 = (vals[0] * wlist).A.flatten()
                                scores2 = (vals[1] * wlist).A.flatten()
                                scores3 = (vals[2] * wlist).A.flatten()
                                m1 = max(scores1)
                                m2 = max(scores2)
                                m3 = max(scores3)
                                if m1 + m2 + m3 > best:
                                    print orig[scores1.argmax()], orig[scores2.argmax()], orig[scores3.argmax()], m1 + m2 + m3
                                    best = m1 + m2 + m3
                                    bestwords = (orig[scores1.argmax()], orig[scores2.argmax()], orig[scores3.argmax()])
    return bestwords, best


if __name__ == '__main__':
    import timeit
    print timeit.timeit('print find_best_words()', 'from __main__ import find_best_words', number=1)

เวอร์ชันเมทริกซ์คือสิ่งที่ฉันเกิดขึ้นหลังจากเขียนหนึ่งในไพ ธ อนบริสุทธิ์ (โดยใช้พจนานุกรมและให้คะแนนแต่ละคำอย่างอิสระ) และอีกอันเป็น numpy แต่ใช้การทำดัชนีแทนที่จะใช้การคูณเมทริกซ์

การเพิ่มประสิทธิภาพต่อไปคือการลบเสียงออกจากการให้คะแนนทั้งหมด (และใช้ord()ฟังก์ชั่นที่ปรับเปลี่ยน) แต่ฉันสงสัยว่ามีวิธีที่เร็วกว่านี้หรือไม่

แก้ไข : เพิ่มรหัส timeit.timeit

แก้ไข : ฉันกำลังเพิ่มรางวัลซึ่งฉันจะให้การปรับปรุงใดที่ฉันชอบมากที่สุด (หรืออาจเป็นคำตอบหลายครั้ง แต่ฉันจะต้องเพิ่มชื่อเสียงบางอย่างถ้าเป็นกรณี)

fastest-code python optimization

— thouis
แหล่งที่มา

BTW ฉันเขียนรหัสเพื่อให้สามคำแปดปีของฉันจำเมื่อเขาเล่นเกมกับแม่ของเขา ตอนนี้ฉันรู้ว่าไซแลปโตรกราฟกราฟหมายถึงอะไร

นี่เป็นคำถามที่สนุก ฉันคิดว่าคุณสามารถทำให้มีแนวโน้มที่จะได้รับคำตอบมากขึ้นหากคุณให้สิ่งต่อไปนี้: (1) ลิงก์ไปยังรายการคำศัพท์ออนไลน์เพื่อให้ทุกคนทำงานกับชุดข้อมูลเดียวกัน (2) ใส่โซลูชันของคุณในฟังก์ชั่นเดียว (3) เรียกใช้ฟังก์ชันนั้นโดยใช้โมดูล time-it เพื่อแสดงการกำหนดเวลา (4) ตรวจสอบให้แน่ใจว่าได้วางการโหลดข้อมูลพจนานุกรมไว้นอกฟังก์ชั่นเพื่อไม่ให้เราทดสอบความเร็วของดิสก์ ผู้คนสามารถใช้รหัสที่มีอยู่เป็นกรอบสำหรับการเปรียบเทียบโซลูชันของพวกเขา

ฉันจะเขียนซ้ำเพื่อใช้ timeit แต่สำหรับการเปรียบเทียบที่ยุติธรรมฉันต้องใช้เครื่องของตัวเอง (ซึ่งฉันยินดีที่จะทำเพื่อคนที่โพสต์โซลูชั่น) รายการคำควรจะสามารถใช้ได้ในระบบส่วนใหญ่ แต่ถ้าไม่ได้มีหลายที่นี่: wordlist.sourceforge.net

การเปรียบเทียบที่เป็นธรรมอาจเกิดขึ้นได้หากผู้ใช้แต่ละคนใช้โซลูชันของคุณและโซลูชันที่โพสต์อื่น ๆ เปรียบเทียบกับเครื่องของตนเอง จะมีความแตกต่างข้ามแพลตฟอร์ม แต่โดยทั่วไปวิธีนี้ใช้ได้ผล

หืมในกรณีนี้ฉันสงสัยว่านี่เป็นไซต์ที่ถูกต้องหรือไม่ ฉันคิดว่าดังนั้นจะเป็นแบบที่ดีที่สุด

— Joey

คำตอบ:

ด้วยความคิดของ Keith ในการคำนวณคะแนนที่ดีที่สุดที่เป็นไปได้สำหรับแต่ละคำนั้นฉันจึงจัดการลดเวลาในการประมวลผลให้เหลือ 0.7 วินาทีบนคอมพิวเตอร์ของฉัน (ใช้รายการ 75,288 คำ)

เคล็ดลับคือการใช้ชุดคำศัพท์ที่จะเล่นแทนที่จะใช้ตัวอักษรทั้งหมดที่เลือก เราสามารถเพิกเฉยได้ทั้งหมดยกเว้นคำบางคำ (203 ใช้รายการคำศัพท์ของฉัน) เพราะพวกเขาไม่สามารถได้คะแนนสูงกว่าที่เราพบ เวลาดำเนินการเกือบทั้งหมดใช้เวลาคำนวณคะแนนคำล่วงหน้าแล้ว

Python 2.7:

import collections
import itertools


WORDS_SOURCE = '../word lists/wordsinf.txt'

WORDS_PER_ROUND = 3
LETTER_GROUP_STRS = ['LTN', 'RDS', 'GBM', 'CHP', 'FWV', 'YKJ', 'QXZ']
LETTER_GROUPS = [list(group) for group in LETTER_GROUP_STRS]
GROUP_POINTS = [(group, i+1) for i, group in enumerate(LETTER_GROUPS)]
POINTS_IF_NOT_CHOSEN = -2


def best_word_score(word):
    """Return the best possible score for a given word."""

    word_score = 0

    # Score the letters that are in groups, chosing the best letter for each
    # group of letters.
    total_not_chosen = 0
    for group, points_if_chosen in GROUP_POINTS:
        letter_counts_sum = 0
        max_letter_count = 0
        for letter in group:
            if letter in word:
                count = word.count(letter)
                letter_counts_sum += count
                if count > max_letter_count:
                    max_letter_count = count
        if letter_counts_sum:
            word_score += points_if_chosen * max_letter_count
            total_not_chosen += letter_counts_sum - max_letter_count
    word_score += POINTS_IF_NOT_CHOSEN * total_not_chosen

    return word_score

def best_total_score(words):
    """Return the best score possible for a given list of words.

    It is fine if the number of words provided is not WORDS_PER_ROUND. Only the
    words provided are scored."""

    num_words = len(words)
    total_score = 0

    # Score the letters that are in groups, chosing the best permutation of
    # letters for each group of letters.
    total_not_chosen = 0
    for group, points_if_chosen in GROUP_POINTS:
        letter_counts = []
        # Structure:  letter_counts[word_index][letter] = count
        letter_counts_sum = 0
        for word in words:
            this_word_letter_counts = {}
            for letter in group:
                count = word.count(letter)
                this_word_letter_counts[letter] = count
                letter_counts_sum += count
            letter_counts.append(this_word_letter_counts)

        max_chosen = None
        for letters in itertools.permutations(group, num_words):
            num_chosen = 0
            for word_index, letter in enumerate(letters):
                num_chosen += letter_counts[word_index][letter]
            if num_chosen > max_chosen:
                max_chosen = num_chosen

        total_score += points_if_chosen * max_chosen
        total_not_chosen += letter_counts_sum - max_chosen
    total_score += POINTS_IF_NOT_CHOSEN * total_not_chosen

    return total_score


def get_words():
    """Return the list of valid words."""
    with open(WORDS_SOURCE, 'r') as source:
        return [line.rstrip().upper() for line in source]

def get_words_by_score():
    """Return a dictionary mapping each score to a list of words.

    The key is the best possible score for each word in the corresponding
    list."""

    words = get_words()
    words_by_score = collections.defaultdict(list)
    for word in words:
        words_by_score[best_word_score(word)].append(word)
    return words_by_score


def get_winning_words():
    """Return a list of words for an optimal play."""

    # A word's position is a tuple of its score's index and the index of the
    # word within the list of words with this score.
    # 
    # word played: A word in the context of a combination of words to be played
    # word chosen: A word in the context of the list it was picked from

    words_by_score = get_words_by_score()
    num_word_scores = len(words_by_score)
    word_scores = sorted(words_by_score, reverse=True)
    words_by_position = []
    # Structure:  words_by_position[score_index][word_index] = word
    num_words_for_scores = []
    for score in word_scores:
        words = words_by_score[score]
        words_by_position.append(words)
        num_words_for_scores.append(len(words))

    # Go through the combinations of words in lexicographic order by word
    # position to find the best combination.
    best_score = None
    positions = [(0, 0)] * WORDS_PER_ROUND
    words = [words_by_position[0][0]] * WORDS_PER_ROUND
    scores_before_words = []
    for i in xrange(WORDS_PER_ROUND):
        scores_before_words.append(best_total_score(words[:i]))
    while True:
        # Keep track of the best possible combination of words so far.
        score = best_total_score(words)
        if score > best_score:
            best_score = score
            best_words = words[:]

        # Go to the next combination of words that could get a new best score.
        for word_played_index in reversed(xrange(WORDS_PER_ROUND)):
            # Go to the next valid word position.
            score_index, word_chosen_index = positions[word_played_index]
            word_chosen_index += 1
            if word_chosen_index == num_words_for_scores[score_index]:
                score_index += 1
                if score_index == num_word_scores:
                    continue
                word_chosen_index = 0

            # Check whether the new combination of words could possibly get a
            # new best score.
            num_words_changed = WORDS_PER_ROUND - word_played_index
            score_before_this_word = scores_before_words[word_played_index]
            further_points_limit = word_scores[score_index] * num_words_changed
            score_limit = score_before_this_word + further_points_limit
            if score_limit <= best_score:
                continue

            # Update to the new combination of words.
            position = score_index, word_chosen_index
            positions[word_played_index:] = [position] * num_words_changed
            word = words_by_position[score_index][word_chosen_index]
            words[word_played_index:] = [word] * num_words_changed
            for i in xrange(word_played_index+1, WORDS_PER_ROUND):
                scores_before_words[i] = best_total_score(words[:i])
            break
        else:
            # None of the remaining combinations of words can get a new best
            # score.
            break

    return best_words


def main():
    winning_words = get_winning_words()
    print winning_words
    print best_total_score(winning_words)

if __name__ == '__main__':
    main()

สิ่งนี้จะคืนผลเฉลย['KNICKKNACK', 'RAZZMATAZZ', 'POLYSYLLABLES']ด้วยคะแนน 95 ด้วยคำพูดจากโซลูชันของ Keith ที่เพิ่มเข้าไปในรายการคำฉันได้ผลลัพธ์เหมือนกับเขา ด้วยการเพิ่ม "xylopyrography" ของเจ้าฉันได้['XYLOPYROGRAPHY', 'KNICKKNACKS', 'RAZZMATAZZ']คะแนน 105

— flornquake
แหล่งที่มา

นี่เป็นแนวคิด - คุณสามารถหลีกเลี่ยงการตรวจสอบคำจำนวนมากโดยสังเกตว่าคำส่วนใหญ่มีคะแนนแย่ สมมติว่าคุณพบคะแนนการเล่นที่ค่อนข้างดีซึ่งทำให้คุณได้ 50 คะแนน ดังนั้นการเล่นที่มีมากกว่า 50 คะแนนจะต้องมีคำอย่างน้อยเพดาน (51/3) = 17 คะแนน ดังนั้นคำใด ๆ ที่ไม่สามารถสร้าง 17 คะแนนจะถูกละเว้น

นี่คือรหัสบางอย่างที่ทำข้างต้น เราคำนวณคะแนนที่ดีที่สุดเท่าที่จะเป็นไปได้สำหรับแต่ละคำในพจนานุกรมและจัดเก็บไว้ในอาร์เรย์ที่จัดทำดัชนีโดยคะแนน จากนั้นเราใช้อาร์เรย์นั้นเพื่อตรวจสอบเฉพาะคำที่มีคะแนนขั้นต่ำที่ต้องการ

from itertools import permutations
import time

S={'A':0,'E':0,'I':0,'O':0,'U':0,
   'L':1,'T':1,'N':1,
   'R':2,'D':2,'S':2,
   'G':3,'B':3,'M':3,
   'C':4,'H':4,'P':4,
   'F':5,'W':5,'V':5,
   'Y':6,'K':6,'J':6,
   'Q':7,'X':7,'Z':7,
   }

def best_word(min, s):
    global score_to_words
    best_score = 0
    best_word = ''
    for i in xrange(min, 100):
        for w in score_to_words[i]:
            score = (-2*len(w)+2*(w.count('A')+w.count('E')+w.count('I')+w.count('O')+w.count('U')) +
                      3*w.count(s[0])+4*w.count(s[1])+5*w.count(s[2])+6*w.count(s[3])+7*w.count(s[4])+
                      8*w.count(s[5])+9*w.count(s[6]))
            if score > best_score:
                best_score = score
                best_word = w
    return (best_score, best_word)

def load_words():
    global score_to_words
    wlist = [l.strip().upper() for l in open('/usr/share/dict/words') if l[0].lower() == l[0]]
    score_to_words = [[] for i in xrange(100)]
    for w in wlist: score_to_words[sum(S[c] for c in w)].append(w)
    for i in xrange(100):
        if score_to_words[i]: print i, len(score_to_words[i])

def find_best_words():
    load_words()
    best = 0
    bestwords = ()
    for c1 in permutations('LTN'):
        for c2 in permutations('RDS'):
            for c3 in permutations('GBM'):
            print time.ctime(),c1,c2,c3
                for c4 in permutations('CHP'):
                    for c5 in permutations('FWV'):
                        for c6 in permutations('YJK'):
                            for c7 in permutations('QZX'):
                                sets = zip(c1, c2, c3, c4, c5, c6, c7)
                                (s1, w1) = best_word((best + 3) / 3, sets[0])
                                (s2, w2) = best_word((best - s1 + 2) / 2, sets[1])
                                (s3, w3) = best_word(best - s1 - s2 + 1, sets[2])
                                score = s1 + s2 + s3
                                if score > best:
                                    best = score
                                    bestwords = (w1, w2, w3)
                                    print score, w1, w2, w3
    return bestwords, best


if __name__ == '__main__':
    import timeit
    print timeit.timeit('print find_best_words()', 'from __main__ import find_best_words', number=1)

คะแนนขั้นต่ำได้อย่างรวดเร็วสูงถึง 100 ซึ่งหมายความว่าเราต้องพิจารณาเพียง 33+ คำซึ่งเป็นเศษส่วนน้อยมากจากทั้งหมดโดยรวม (ของฉัน/usr/share/dict/wordsมี 208662 คำที่ถูกต้องเพียง 1723 ที่ 33 คะแนน = 0.8%) ทำงานในเครื่องของฉันประมาณครึ่งชั่วโมงและสร้าง:

(('MAXILLOPREMAXILLARY', 'KNICKKNACKED', 'ZIGZAGWISE'), 101)

— Keith Randall
แหล่งที่มา

ดี ฉันจะเพิ่มเข้าไปในโซลูชันเมทริกซ์ (ลบคำเนื่องจากคะแนนของพวกเขาต่ำเกินไป) แต่นี่ดีกว่าโซลูชันหลามบริสุทธิ์ที่ฉันสร้างขึ้นมาอย่างมีนัยสำคัญ

— คุณ

ฉันไม่แน่ใจว่าฉันเคยเห็นว่าหลายรังซ้อนกันมาก่อน

— Peter Olson

การรวมความคิดของคุณเข้ากับการให้คะแนนแมทริกซ์ (และขอบเขตบนที่เข้มงวดมากขึ้นกับคะแนนที่ดีที่สุด) จะลดเวลาลงเหลือประมาณ 80 วินาทีในเครื่องของฉัน (จากประมาณหนึ่งชั่วโมง) รหัสที่นี่

— เจ้า

ส่วนที่ดีของเวลานั้นอยู่ในการคำนวณคะแนนล่วงหน้าที่ดีที่สุดเท่าที่จะเป็นไปได้ซึ่งสามารถทำได้เร็วกว่ามาก

— เจ้า