利用Cloudflare邮件路由与Email Worker实现邮件推送

2025/12/17
posted in 技术分享
#Cloudflare #邮箱
views
实现验证码自动提取以及原邮件内容推送到Bark服务器

// ======== 配置区（你只需改这里） ========
const BARK_KEY = "";     // <-- 只改这里
const BARK_SERVER = "";  // 不用改（如你有自建 Bark 再改）
const BARK_PUSH_URL = `${BARK_SERVER}/${BARK_KEY}`;
const BARK_ARG_OTP = "group=OTP&level=timeSensitive&copy=1&code=";
const BARK_ARG_FULL = "group=MAIL&level=passive";
// =======================================

// ---------- 辅助函数 ----------

// Quoted-Printable 简单解码（用于正文解码）
function decodeQuotedPrintable(str) {
  if (!str) return str;
  return str
    .replace(/=\r\n/g, '') // 移除软换行
    .replace(/=([A-Fa-f0-9]{2})/g, (_, hex) => String.fromCharCode(parseInt(hex, 16)));
}

// Base64 -> UTF-8 安全解码
function decodeBase64Utf8(base64Str) {
  const cleaned = base64Str.replace(/\r?\n/g, '');
  // atob -> bytes -> TextDecoder("utf-8")
  const binary = atob(cleaned);
  const bytes = Uint8Array.from(binary, (c) => c.charCodeAt(0));
  return new TextDecoder('utf-8').decode(bytes);
}

// 删除 <style>、<script>、注释 并把常用块级标签换成换行，最后去掉剩余标签并解实体（简单）
function htmlToCleanText(html) {
  if (!html) return '';

  // Remove comments, style, script
  let s = html.replace(/<!--[\s\S]*?-->/g, ' ');
  s = s.replace(/<style[\s\S]*?>[\s\S]*?<\/style>/gi, ' ');
  s = s.replace(/<script[\s\S]*?>[\s\S]*?<\/script>/gi, ' ');

  // Normalize <br> to newline
  s = s.replace(/<(br|br\/|br\s+\/)>/gi, '\n');

  // closing block-like tags -> newline
  s = s.replace(/<\/(p|div|tr|li|h[1-6]|table|tbody|thead|tfoot)>/gi, '\n');
  s = s.replace(/<(p|div|tr|li|h[1-6]|table|tbody|thead|tfoot)[^>]*>/gi, '\n');

  // remove remaining tags
  s = s.replace(/<[^>]+>/g, ' ');

  // decode simple HTML entities (numeric & a few named)
  s = s.replace(/&#x([0-9A-Fa-f]+);?/g, (_, hx) => String.fromCharCode(parseInt(hx, 16)));
  s = s.replace(/&#([0-9]+);?/g, (_, d) => String.fromCharCode(parseInt(d, 10)));
  s = s.replace(/&nbsp;|&amp;|&lt;|&gt;|&quot;|&apos;/g, (m) => {
    switch (m) {
      case '&nbsp;': return ' ';
      case '&amp;': return '&';
      case '&lt;': return '<';
      case '&gt;': return '>';
      case '&quot;': return '"';
      case '&apos;': return "'";
      default: return m;
    }
  });

  // Collapse repeated whitespace but keep newlines
  s = s.replace(/\r\n/g, '\n');
  s = s.replace(/[ \t]+\n/g, '\n');
  s = s.replace(/\n{3,}/g, '\n\n');
  s = s.replace(/[ \t]{2,}/g, ' ');
  s = s.trim();

  return s;
}

// 在 HTML 清洗前提取 <a> 链接，防止按钮链接丢失
function preserveLinksForPlainText(html) {
  if (!html) return html;

  return html.replace(
    /<a\s+[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>/gi,
    (m, href, text) => {
      const label = text.replace(/<[^>]+>/g, '').trim();
      return label ? `${label} (${href})` : href;
    }
  );
}

// 判断一段文本是否像大段 Base64（用于启发式判断）
function looksLikeBase64(s) {
  if (!s) return false;
  const trimmed = s.replace(/\r?\n/g, '').trim();
  // 若长度较短不认为是 base64
  if (trimmed.length < 40) return false;
  // 基本 base64 字符集检测并以 = 或 == 结尾为加权因素
  return /^[A-Za-z0-9+/=\s]+$/.test(s) && /={1,2}$/.test(trimmed);
}

// ---------- 解析 multipart / parts ----------

// 从一个 MIME part 中提取 text/plain 或 text/html 的干净文本（已解码）
function parsePartText(part) {
  // find content-type
  const ctMatch = part.match(/Content-Type:\s*([^;]+)(?:;[\s\S]*)?/i);
  const ctype = ctMatch ? ctMatch[1].toLowerCase().trim() : null;

  // extract body (after first blank line)
  const splitIndex = part.search(/\r\n\r\n/);
  let body = splitIndex >= 0 ? part.slice(splitIndex + 4) : part;
  body = body.replace(/\r\n$/, '');

  // check encoding
  const isBase64 = /Content-Transfer-Encoding:\s*base64/i.test(part) || looksLikeBase64(body);
  const isQP = /Content-Transfer-Encoding:\s*quoted-printable/i.test(part) || /=([0-9A-F]{2})/i.test(body);

  // decode body if needed
  try {
    if (isBase64) {
      body = decodeBase64Utf8(body);
    } else if (isQP) {
      body = decodeQuotedPrintable(body);
    }
  } catch (e) {
    // fallback: keep original if decode fails
    console.warn('decode failed for part', e);
  }

  // If plain text
  if (ctype && /text\/plain/i.test(ctype)) {
    // normalize CRLF -> \n, keep paragraphs
    body = body.replace(/\r\n/g, '\n');
    body = body.replace(/\n{3,}/g, '\n\n');
    body = body.trim();
    return { type: 'text', text: body };
  }

  // If html
  if (ctype && /text\/html/i.test(ctype)) {
    const cleaned = htmlToCleanText(body);
    return { type: 'html', text: cleaned };
  }

  // fallback: return raw decoded text (but mark as unknown)
  body = body.replace(/\r\n/g, '\n').trim();
  return { type: 'unknown', text: body };
}

// 从 raw MIME 中收集 candidateParts（优先 text/plain）
function collectCandidateParts(raw) {
  const candidate = [];

  // try detect boundary (may be quoted or not)
  let boundaryMatch = raw.match(/boundary="?([^"\r\n]+)"?/i);
  if (boundaryMatch) {
    const boundary = boundaryMatch[1];
    // split on --boundary but ignore final --
    const chunks = raw.split(`--${boundary}`);
    for (const ch of chunks) {
      if (!ch || /^\s*--/.test(ch)) continue;
      const parsed = parsePartText(ch);
      if (!parsed) continue;
      if (parsed.type === 'text') candidate.push(parsed.text);
    }
    // if we found at least one text/plain, return them
    if (candidate.length > 0) return candidate;

    // otherwise fallback: collect cleaned html bodies
    const htmls = [];
    for (const ch of chunks) {
      if (!ch || /^\s*--/.test(ch)) continue;
      const parsed = parsePartText(ch);
      if (parsed && parsed.type === 'html') htmls.push(parsed.text);
    }
    if (htmls.length > 0) return htmls;
  } else {
    // no boundary: try find text/plain or text/html blocks via regex
    const plainMatch = raw.match(/Content-Type:\s*text\/plain[\s\S]*?\r\n\r\n([\s\S]+)/i);
    if (plainMatch) {
      let body = plainMatch[1];
      // decode heuristically
      const isBase64 = looksLikeBase64(body) || /Content-Transfer-Encoding:\s*base64/i.test(raw);
      if (isBase64) {
        try { body = decodeBase64Utf8(body); } catch (e) { /* ignore */ }
      } else if (/Content-Transfer-Encoding:\s*quoted-printable/i.test(raw)) {
        body = decodeQuotedPrintable(body);
      }
      body = body.replace(/\r\n/g, '\n').trim();
      return [body];
    }
    const htmlMatch = raw.match(/Content-Type:\s*text\/html[\s\S]*?\r\n\r\n([\s\S]+)/i);
    if (htmlMatch) {
      let body = htmlMatch[1];
      const isBase64 = looksLikeBase64(body) || /Content-Transfer-Encoding:\s*base64/i.test(raw);
      if (isBase64) {
        try { body = decodeBase64Utf8(body); } catch (e) { /* ignore */ }
      } else if (/Content-Transfer-Encoding:\s*quoted-printable/i.test(raw)) {
        body = decodeQuotedPrintable(body);
      }
      const cleaned = htmlToCleanText(body);
      return [cleaned];
    }

    // final fallback: take entire raw body after first headers
    const wholeBody = raw.split(/\r\n\r\n/).slice(1).join('\r\n\r\n').trim();
    if (wholeBody) {
      // attempt decode if looks encoded
      let body = wholeBody;
      if (looksLikeBase64(body)) {
        try { body = decodeBase64Utf8(body); } catch (e) { /* ignore */ }
      } else if (/=([0-9A-F]{2})/i.test(body)) {
        body = decodeQuotedPrintable(body);
      }
      body = body.replace(/\r\n/g, '\n').trim();
      // try to strip style/script if present
      const cleaned = htmlToCleanText(body);
      return [cleaned];
    }

    return [];
  }

  return [];
}

// ---------- 验证码提取逻辑 ----------

// 优先级：关键词 -> 纯数字 -> 必含数字的混合(可含 -) -> 多段连字符（2+）
// 且尽量避免 CSS 单词误匹配
function extractCodeFromCandidates(candidates) {
  if (!Array.isArray(candidates)) return null;

  // common CSS words to avoid accidental matches
  const cssWords = ['margin', 'padding', 'width', 'height', 'color', 'border', 'background', 'font', 'display', 'table', 'solid', 'px'];

  // build keyword regex
  const codeKeywords = [
    "验证码", "校验码", "动态码", "一次性密码", "一次性代码",
    "验证代码", "安全码", "认证码", "提取码",
    "verification code", "verify code",
    "one-time password", "one time password",
    "security code", "passcode", "OTP"
  ];
  const keywordRegex = new RegExp(`(?:${codeKeywords.join("|")})[\\s\\S]{0,50}?([0-9A-Za-z][-0-9A-Za-z]{3,20})`, 'i');

  // pure digits 4-12 (most robust)
  const digitsRegex = /\b(\d{4,12})\b/;

  // mixed that must contain at least one digit (allows hyphen inside, but not end with hyphen)
  const mixedRegex = /(?=\S*\d)\b([0-9A-Za-z]+(?:[-][0-9A-Za-z]+){0,4})\b/;

  // multi-hyphen strict: at least two '-' segments (e.g., a-b-c)
  const multiHyphenRegex = /\b([0-9A-Za-z]+(?:-[0-9A-Za-z]+){2,})\b/;

  // helper to filter obviously CSS tokens
  function looksLikeCssToken(tok) {
    if (!tok) return false;
    const low = tok.toLowerCase();
    if (/\b(?:[a-z\-]+:|;|\{|\})/.test(tok)) return true; // contains CSS punctuation
    for (const w of cssWords) if (low.includes(w)) return true;
    // CSS function like rgb( or url( etc
    if (/[A-Za-z\-]+\(.*\)/.test(tok)) return true;
    return false;
  }

  // 1) keyword scan
  for (const c of candidates) {
    try {
      const m = c.match(keywordRegex);
      if (m) {
        const cand = m[1];
        if (!looksLikeCssToken(cand)) return cand;
      }
    } catch (e) { /* ignore */ }
  }

  // 2) digits scan
  for (const c of candidates) {
    const m = c.match(digitsRegex);
    if (m) return m[1];
  }

  // 3) mixed that must include digits (and not end with hyphen)
  for (const c of candidates) {
    const m = c.match(mixedRegex);
    if (m) {
      const cand = m[1];
      if (!/^.*-$/.test(cand) && !looksLikeCssToken(cand)) return cand;
    }
  }

  // 4) multi-hyphen fallback (strict length after removing hyphens)
  for (const c of candidates) {
    const m = c.match(multiHyphenRegex);
    if (m) {
      const cand = m[1];
      const pure = cand.replace(/-/g, '');
      if (pure.length >= 4 && pure.length <= 20 && /\d/.test(pure) && !looksLikeCssToken(cand)) {
        return cand;
      }
    }
  }

  return null;
}

// ---------- Worker 入口 ----------
export default {
  async email(message, env, ctx) {
    try {
      const raw = await new Response(message.raw).text();

      // headers
      const from = message.headers.get('from') || '(未知发件人)';
      const to = message.headers.get('to') || '(未知收件人)';
      const subject = message.headers.get('subject') || '';

      // collect candidate parts (prefer text/plain)
      const candidates = collectCandidateParts(raw);

      // final fallback: if candidates empty, try whole-body cleaned
      let usedCandidates = candidates && candidates.length ? candidates : [];
      if (usedCandidates.length === 0) {
        const wholeBodyRaw = raw.split(/\r\n\r\n/).slice(1).join('\r\n\r\n').trim();
        if (wholeBodyRaw) {
          // decode if likely encoded
          let wb = wholeBodyRaw;
          if (looksLikeBase64(wb)) {
            try { wb = decodeBase64Utf8(wb); } catch (e) {}
          } else if (/=([0-9A-F]{2})/i.test(wb)) {
            wb = decodeQuotedPrintable(wb);
          }
          const cleaned = htmlToCleanText(wb);
          usedCandidates = [cleaned];
        }
      }

      // extract code
      const code = extractCodeFromCandidates(usedCandidates);

      console.log('提取到的验证码：', code || '(未识别)');

      // ---------------------------
      // 1) OTP 推送（标题为发件人，正文为收件人 + code）
      const titleOtp = from;
      const bodyOtp = code ? `收件人：${to}\n验证码：${code}` : `收件人：${to}\n未识别验证码`;
      const pushUrlOtp = `${BARK_PUSH_URL}/${encodeURIComponent(titleOtp)}/${encodeURIComponent(bodyOtp)}?${BARK_ARG_OTP}${code || ''}`;
      await fetch(pushUrlOtp);

      // 2) 原文推送（方案 A：优先 text/plain（保留换行），没有则清洗 html body）
      // Try to get plain text part for original display
      let originalText = '';
      const boundaryMatch = raw.match(/boundary="?([^"\r\n]+)"?/i);
      if (boundaryMatch) {
        // try find first text/plain parsed earlier by collectCandidateParts logic:
        const plainParts = [];
        const boundary = boundaryMatch[1];
        const chunks = raw.split(`--${boundary}`);
        for (const ch of chunks) {
          if (!ch || /^\s*--/.test(ch)) continue;
          const ct = ch.match(/Content-Type:\s*([^;]+)/i);
          if (ct && /text\/plain/i.test(ct[1])) {
            // decode same way as parsePartText
            const parsed = parsePartText(ch);
            if (parsed && parsed.type === 'text') {
              plainParts.push(parsed.text);
            } else if (parsed && parsed.text) {
              plainParts.push(parsed.text);
            }
          }
        }
        if (plainParts.length > 0) originalText = plainParts.join('\n\n');
        else {
          // fallback to first html cleaned (但先保留链接)
          const htmlParts = [];
          for (const ch of chunks) {
            if (!ch || /^\s*--/.test(ch)) continue;
            const ct = ch.match(/Content-Type:\s*([^;]+)/i);
            if (ct && /text\/html/i.test(ct[1])) {
              const splitIndex = ch.search(/\r\n\r\n/);
              let body = splitIndex >= 0 ? ch.slice(splitIndex + 4) : ch;

              if (/Content-Transfer-Encoding:\s*base64/i.test(ch)) {
                try { body = decodeBase64Utf8(body); } catch {}
              } else if (/quoted-printable/i.test(ch)) {
                body = decodeQuotedPrintable(body);
              }

              // ⭐ 核心补丁：先保留链接
              body = preserveLinksForPlainText(body);

              const cleaned = htmlToCleanText(body);
              if (cleaned) htmlParts.push(cleaned);
            }
          }
          originalText = htmlParts.join('\n\n');
        }
      } else {
        // no boundary: try text/plain header then html
        const plainMatch = raw.match(/Content-Type:\s*text\/plain[\s\S]*?\r\n\r\n([\s\S]+)/i);
        if (plainMatch) {
          let body = plainMatch[1];
          if (looksLikeBase64(body)) {
            try { body = decodeBase64Utf8(body); } catch (e) {}
          } else if (/=([0-9A-F]{2})/i.test(body)) {
            body = decodeQuotedPrintable(body);
          }
          originalText = body.replace(/\r\n/g, '\n').trim();
        } else {
          const htmlMatch = raw.match(/Content-Type:\s*text\/html[\s\S]*?\r\n\r\n([\s\S]+)/i);
          if (htmlMatch) {
            let body = htmlMatch[1];
            if (looksLikeBase64(body)) {
              try { body = decodeBase64Utf8(body); } catch (e) {}
            } else if (/=([0-9A-F]{2})/i.test(body)) {
              body = decodeQuotedPrintable(body);
            }
            body = preserveLinksForPlainText(body); // ⭐ 补这一行
            originalText = htmlToCleanText(body);
          } else {
            // final fallback: whole-body cleaned
            const whole = raw.split(/\r\n\r\n/).slice(1).join('\r\n\r\n').trim();
            if (whole) {
              let body = whole;
              if (looksLikeBase64(body)) {
                try { body = decodeBase64Utf8(body); } catch (e) {}
              } else if (/=([0-9A-F]{2})/i.test(body)) {
                body = decodeQuotedPrintable(body);
              }
              originalText = htmlToCleanText(body);
            }
          }
        }
      }

      // normalize newlines and trim; keep paragraph breaks
      originalText = originalText.replace(/\r\n/g, '\n').replace(/\n{3,}/g, '\n\n').trim();
      const rawText = originalText.slice(0, 5000);

      const titleFull = from;
      const bodyFull = `收件人：${to}\n邮件内容（截断5000字符）：\n${rawText}`;
      const pushUrlFull = `${BARK_PUSH_URL}/${encodeURIComponent(titleFull)}/${encodeURIComponent(bodyFull)}?${BARK_ARG_FULL}`;
      await fetch(pushUrlFull);

    } catch (err) {
      console.error('Email Worker 错误：', err);
    }
  }
};
反虚拟化检测虚机(QEMU/virsh) »
利用Cloudflare邮件路由与Email Worker实现邮件推送

huzheyi‘s BLOG

Categories

Tags

Recent Posts