Synopsis Language Mismatch Highlighter

Highlights synopsis fields when language doesn't match title metadata language for multiple languages

// ==UserScript==
// @name         Synopsis Language Mismatch Highlighter
// @namespace    http://tampermonkey.net/
// @version      3.5
// @description  Highlights synopsis fields when language doesn't match title metadata language for multiple languages
// @match        https://kdpow.amazon.com/work/vdp/baseline/*
// @match        https://crisp.amazon.com/details/*
// @match        https://kdpow.amazon.com/work/pv/baseline/*
// @run-at       document-start
// @grant        none
// @author       aakpooni
// ==/UserScript==

(function() {
    'use strict';

    const languagePatterns = {
        english: {
            variants: ['en', 'en-us', 'en-gb', 'en-ca', 'en-au', 'en-nz', 'en-ie', 'en-za', 'en-in'],
            words: /\b(the|and|for|that|with|this|from|they|will|would|what|about|there|their|have)\b/gi,
            chars: /^[a-z\s.,!?'"-]+$/i,
            minWords: 3
        },
        german: {
            variants: ['de', 'de-de', 'de-at', 'de-ch', 'de-li', 'de-lu'],
            words: /\b(der|die|das|und|ist|mit|für|von|den|zu|nicht|eine|dass|wird|bei|sind|auch)\b/gi,
            chars: /[äöüßÄÖÜ]/,
            minWords: 2
        },
        french: {
            variants: ['fr', 'fr-fr', 'fr-be', 'fr-ca', 'fr-ch', 'fr-lu', 'fr-mc'],
            words: /\b(les|des|que|dans|est|pour|qui|vous|nous|sont|avec|mais|elle|tout|plus)\b/gi,
            chars: /[éèêëàâçîïôûùüÿœæÉÈÊËÀÂÇÎÏÔÛÙÜŸŒÆ]/,
            minWords: 2
        },
        spanish: {
            variants: ['es', 'es-es', 'es-mx', 'es-ar', 'es-bo', 'es-cl', 'es-co', 'es-cr', 'es-do', 'es-ec', 'es-gt', 'es-hn', 'es-ni', 'es-pa', 'es-pe', 'es-pr', 'es-py', 'es-sv', 'es-uy', 'es-ve'],
            words: /\b(que|los|las|por|una|para|con|del|está|tiene|pero|cuando|este|sobre|muy)\b/gi,
            chars: /[áéíóúñ¿¡ÁÉÍÓÚÑ]/,
            minWords: 2
        },
        portuguese: {
            variants: ['pt', 'pt-br', 'pt-pt', 'pt-ao', 'pt-cv', 'pt-gw', 'pt-mo', 'pt-mz', 'pt-st', 'pt-tl'],
            words: /\b(que|dos|das|não|como|uma|por|seu|quando|muito|nos|está|são|tem|mas)\b/gi,
            chars: /[áâãàçéêíóôõúÁÂÃÀÇÉÊÍÓÔÕÚ]/,
            minWords: 2
        },
        italian: {
            variants: ['it', 'it-it', 'it-ch', 'it-sm', 'it-va'],
            words: /\b(che|per|sono|questo|anche|come|alla|della|quello|tutti|loro|quando)\b/gi,
            chars: /[àèéìíîòóùúÀÈÉÌÍÎÒÓÙÚ]/,
            minWords: 2
        },
        russian: {
            variants: ['ru', 'ru-ru', 'ru-by', 'ru-kz', 'ru-kg'],
            words: /\b(это|как|все|для|вот|лет|еще|вас|они|вам|уже|чем|если|очень)\b/gi,
            chars: /[а-яёА-ЯЁ]/,
            minWords: 2
        },
        dutch: {
            variants: ['nl', 'nl-nl', 'nl-be', 'nl-aw', 'nl-cw', 'nl-sx'],
            words: /\b(het|een|dat|wat|van|met|bij|nog|maar|wel|toch|alleen|twee|hier)\b/gi,
            chars: /[äëïöüÄËÏÖÜ]/,
            minWords: 2
        },
        norwegian: {
            variants: ['no', 'nb', 'nn', 'no-no', 'nb-no', 'nn-no'],
            words: /\b(og|er|som|på|for|til|med|har|ikke|det|var|jeg|han|hun)\b/gi,
            chars: /[æøåÆØÅ]/,
            minWords: 2
        },
        japanese: {
            variants: ['ja', 'ja-jp'],
            words: /[はをがでのにへとかもる]/,
            chars: /[\u3040-\u309F\u30A0-\u30FF]/,
            minWords: 1
        },
        chinese: {
            variants: ['zh', 'zh-cn', 'zh-tw', 'zh-hk', 'zh-sg', 'zh-mo'],
            words: /[的是不我有在人这个们]/,
            chars: /[\u4E00-\u9FFF]/,
            minWords: 1
        },
        korean: {
            variants: ['ko', 'ko-kr'],
            words: /[는은이가을를]/,
            chars: /[\uAC00-\uD7AF\u1100-\u11FF]/,
            minWords: 1
        },
        arabic: {
            variants: ['ar', 'ar-sa', 'ar-ae', 'ar-bh', 'ar-dz', 'ar-eg', 'ar-iq', 'ar-jo', 'ar-kw', 'ar-lb', 'ar-ly'],
            words: /[في|من|على|إلى|عن|مع|هذا|التي|هو]/,
            chars: /[\u0600-\u06FF]/,
            minWords: 1
        }
    };
    function getLanguageFamily(text) {
        if (!text || typeof text !== 'string' || text.trim().length === 0) {
            return 'unknown';
        }

        const scores = {};

        // Initialize scores
        for (const lang in languagePatterns) {
            scores[lang] = 0;
        }

        // Calculate scores for each language
        for (const [lang, pattern] of Object.entries(languagePatterns)) {
            // Check for special characters
            const charMatches = (text.match(pattern.chars) || []).length;
            scores[lang] += charMatches * 2;

            // Check for common words
            const wordMatches = (text.match(pattern.words) || []).length;
            scores[lang] += wordMatches * 3;

            // If minimum word threshold isn't met, reduce score
            if (wordMatches < pattern.minWords) {
                scores[lang] *= 0.5;
            }
        }

        // Special handling for English (if text is purely ASCII)
        if (/^[a-z\s.,!?'"-]+$/i.test(text)) {
            scores.english += 3;
        }

        // Special handling for CJK languages
        if (/[\u3040-\u30FF]/.test(text)) {
            scores.japanese += 5;
        }
        if (/[\u4E00-\u9FFF]/.test(text)) {
            scores.chinese += 5;
        }
        if (/[\uAC00-\uD7AF\u1100-\u11FF]/.test(text)) {
            scores.korean += 5;
        }

        // Find the language with the highest score
        let maxScore = 0;
        let detectedLang = 'unknown';

        for (const [lang, score] of Object.entries(scores)) {
            if (score > maxScore) {
                maxScore = score;
                detectedLang = lang;
            }
        }

        // Require a minimum confidence threshold
        return maxScore >= 2 ? detectedLang : 'unknown';
    }

    function getMetadataLanguage(metadataText) {
        const lowerText = metadataText.toLowerCase();

        // Check for specific language patterns in metadata
        for (const [lang, pattern] of Object.entries(languagePatterns)) {
            // Check all variants
            for (const variant of pattern.variants) {
                if (lowerText.includes(`[${variant}]`)) {
                    return lang;
                }
            }
            // Check for language name
            if (lowerText.includes(lang)) {
                return lang;
            }
        }

        return 'unknown';
    }

    function has8ContinuousChars(text, lang) {
        // Special handling for languages with different character sets
        switch(lang) {
            case 'chinese':
                return /[\u4E00-\u9FFF]{4,}/.test(text);
            case 'japanese':
                return /[\u3040-\u309F\u30A0-\u30FF]{4,}/.test(text);
            case 'korean':
                return /[\uAC00-\uD7AF\u1100-\u11FF]{4,}/.test(text);
            case 'arabic':
                return /[\u0600-\u06FF]{8,}/.test(text);
            case 'russian':
                return /[а-яёА-ЯЁ]{8,}/.test(text);
            case 'english':
                return /[a-zA-Z]{8,}/.test(text) &&
                       /\b(the|and|for|that|with)\b/i.test(text);
            case 'german':
                return /[a-zA-ZäöüßÄÖÜ]{8,}/.test(text) &&
                       /\b(der|die|das|und|ist)\b/i.test(text);
            case 'french':
                return /[a-zA-ZéèêëàâçîïôûùüÿœæÉÈÊËÀÂÇÎÏÔÛÙÜŸŒÆ]{8,}/.test(text) &&
                       /\b(les|des|que|dans|est)\b/i.test(text);
            case 'spanish':
                return /[a-zA-ZáéíóúñÁÉÍÓÚÑ]{8,}/.test(text) &&
                       /\b(que|los|las|por|una)\b/i.test(text);
            case 'portuguese':
                return /[a-zA-ZáâãàçéêíóôõúÁÂÃÀÇÉÊÍÓÔÕÚ]{8,}/.test(text) &&
                       /\b(que|dos|das|não|como)\b/i.test(text);
            case 'italian':
                return /[a-zA-ZàèéìíîòóùúÀÈÉÌÍÎÒÓÙÚ]{8,}/.test(text) &&
                       /\b(che|per|sono|questo|anche)\b/i.test(text);
            case 'dutch':
                return /[a-zA-ZäëïöüÄËÏÖÜ]{8,}/.test(text) &&
                       /\b(het|een|dat|wat|van)\b/i.test(text);
            case 'norwegian':
                return /[a-zA-ZæøåÆØÅ]{8,}/.test(text) &&
                       /\b(og|er|som|på|for)\b/i.test(text);
            default:
                return false;
        }
    }

    function highlightSynopsisMismatch() {
        const languageHeader = Array.from(document.querySelectorAll('th')).find(th =>
            th.textContent.trim() === 'Title Metadata Language');
        if (!languageHeader) return;

        const languageCell = languageHeader.nextElementSibling;
        if (!languageCell) return;

        const metadataText = languageCell.textContent.trim();
        const metadataFamily = getMetadataLanguage(metadataText);

        const elements = document.querySelectorAll('th');
        elements.forEach(element => {
            const text = element.textContent.trim();

            if (text === 'Short Synopsis' || text === 'Long Synopsis') {
                const synopsisContent = element.nextElementSibling;
                if (!synopsisContent) return;

                const synopsisText = synopsisContent.textContent.trim();
                const synopsisFamily = getLanguageFamily(synopsisText);

                // Check for 8 continuous characters for all supported languages
                const has8CharsMatch = has8ContinuousChars(synopsisText, metadataFamily);

                // Only highlight if there's a clear mismatch between language families
                // and there's no 8 continuous character match
                if (synopsisFamily !== 'unknown' &&
                    metadataFamily !== 'unknown' &&
                    synopsisFamily !== metadataFamily &&
                    !has8CharsMatch) {

                    console.log('Language mismatch:', {
                        synopsis: synopsisFamily,
                        metadata: metadataFamily,
                        text: synopsisText.substring(0, 100),
                        has8CharsMatch: has8CharsMatch
                    });

                    synopsisContent.style.backgroundColor = '#ffeb3b';
                    synopsisContent.style.padding = '5px';
                    synopsisContent.title = `Detected: ${synopsisFamily.toUpperCase()}, Expected: ${metadataFamily.toUpperCase()}`;
                } else {
                    synopsisContent.style.backgroundColor = '';
                    synopsisContent.style.padding = '';
                    synopsisContent.title = '';
                }
            }
        });
    }

    function observeDOMChanges() {
        const observer = new MutationObserver((mutations) => {
            for (let mutation of mutations) {
                if (mutation.type === 'childList') {
                    highlightSynopsisMismatch();
                }
            }
        });

        observer.observe(document.body, { childList: true, subtree: true });
    }

    // Initialize the script
    if (document.readyState === 'loading') {
        document.addEventListener('DOMContentLoaded', () => {
            highlightSynopsisMismatch();
            observeDOMChanges();
        });
    } else {
        highlightSynopsisMismatch();
        observeDOMChanges();
    }
})();

QingJ © 2025

镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址