开发者问题收集

将 90,000 封电子邮件拉取到 Google Sheet

2022-06-09
110

我使用了 https://webapps.stackexchange.com/questions/160232/gmail-bulk-emails-manipulation 并做了一些修改,将大约 90,000 封电子邮件(来自免费的 gmail 帐户)复制到工作表中。

该脚本仅工作一次。一个周期后,触发器变为“禁用”,并显示“原因不明”。

我减少了批次大小,增加了时间间隔,但仍然出现相同的错误。

我哪里做错了?


/**
 * Creates the first trigger to call batchArchiveEmail. 
 */
function init(){

    var Triggers = ScriptApp.getProjectTriggers();
    for (var p = 0; p < Triggers.length; p++) {
      ScriptApp.deleteTrigger(Triggers[p])
    }



   ScriptApp
    .newTrigger('batchArchiveEmail')
    .timeBased()
    .after(60 * 1000)
    .create();
    console.log(`trigger created`)
}

/**
 * Archive emails by batches preventing controlling limiting the execution time and  
 * creating a trigger if there are still threads pending to be archived.
 */ 
function batchArchiveEmail(){
  const start = Date.now();
  /** 
   * Own execution time limit for the search and archiving operations to prevent an 
   * uncatchable error. As the execution time check is done in do..while condition there  
   * should be enough time to one search and archive operation and to create a trigger 
   * to start a new execution. 
   */ 
  const maxTime = 3 * 60 * 1000; // Instead of 25 use 3 for Google free accounts
  const batchSize = 50;
  let threads, elapsedTime;
  var ss=SpreadsheetApp.getActive();
  var sheet=ss.getSheetByName("Sheet1");
  /** Search and archive threads, then repeat until the search returns 0 threads or the 
   * maxTime is reached
   */ 
  var ms=[];
  do {

    threads = GmailApp.search('label:inbox before:2022/5/1');
    for (let j = 0; j < threads.length; j += batchSize) {
      //below code added by me
      ms=[];
      var messages = threads[j].getMessages();
      for (var m = 0; m < messages.length; m++) {
        var from = messages[m].getFrom(); //from field
        var mId = messages[m].getId();//id field to create the link later
        var supportStats = [];
        var to = messages[m].getTo();//to field
        var time = messages[m].getDate();//date field
        var subject = messages[m].getSubject();//subject field
        var body=messages[m].getPlainBody();
        var tel=[];
        tel = body.match(/[\+]?\d{10}|\(\d{3}\)\s?-\d{6}|\d{3}\s-\d{3}\s\d{4}/);
        supportStats.push(from);
        supportStats.push(to);
        supportStats.push(time);
        supportStats.push(subject);
        supportStats.push('https://mail.google.com/mail/u/0/#inbox/'+mId); //build the URL to the email
        supportStats.push(body);
        if (tel){supportStats.push(tel[0])} else {supportStats.push("")};
        ms.push(supportStats);
      }
    var lr=sheet.getLastRow();
    sheet.getRange(lr+1,1,ms.length,7).setValues(ms);
    //above code added by me
    GmailApp.moveThreadsToArchive(threads.slice(j, j + batchSize));
    };
    /**
     * Used to prevent to have too many calls in a short time, might not be 
     * necessary with a large enough batchSize
     */
    Utilities.sleep(`2000`); 
    elapsedTime = Date.now() - start;
  } while (threads.length > 0 &&  elapsedTime < maxTime);
  if(threads.length > 0){
    /** Delete the last trigger */

    var Triggers = ScriptApp.getProjectTriggers();
    for (var p = 0; p < Triggers.length; p++) {
      ScriptApp.deleteTrigger(Triggers[p])
    }


    //deleteTriggers();

    /** Create a one-time new trigger */
    ScriptApp
    .newTrigger('batchArchiveEmail')
    .timeBased()
    .after(300 * 1000)
    .create();
    console.log(`next trigger created`)
  } else {
    /** Delete the last trigger */
    var Triggers = ScriptApp.getProjectTriggers();
    for (var p = 0; p < Triggers.length; p++) {
      ScriptApp.deleteTrigger(Triggers[p])
    }
    console.log(`No more threads to process`);
  }
}


3个回答

问题和解决方法:

当我测试您的脚本时,我确认了与您相同的情况。在这种情况下,不幸的是,即使增加下一次触发时间,也无法避免该问题。

在此回答中,我想介绍一种解决方法,我已经在 此线程 中发布过。不幸的是,我的回答对该线程没有用。但幸运的是,当我测试您的脚本时,我确认可以使用此解决方法。

关于 新触发器从未运行。 ,如果您在脚本编辑器中使用 V8 运行时并且安装的触发器不起作用,我担心这可能是错误。 Ref1 Ref2

不过,幸运的是,目前有一个解决方法可以解决这个错误。那就是使用 Web Apps。 Ref 我以前也遇到过同样的情况。此解决方法的流程如下。

  1. 运行要运行的脚本。
  2. 当安装了时间驱动触发器时,即使用 Web Apps 安装。
    • 这是重点。

在这种情况下,脚本安装的时间驱动触发器将运行。通过此解决方法,您的脚本可以在启用 V8 运行时的情况下工作。修改脚本后,它将变为以下形式。

用法:

在此解决方法中,使用 Web Apps。因此,请执行以下流程。

1. 部署 Web Apps。

  1. 在脚本编辑器上,在脚本编辑器的右上角,请单击“单击部署”->“新部署”。
  2. 请单击“选择类型”-> “Web App”。
  3. 请在“部署配置”下的字段中输入有关 Web App 的信息。
  4. 请为 “以...身份执行” 选择 “我”
  5. 请为 “谁有访问权限” 选择 “仅限我自己”
  6. 请点击“部署”按钮。然后点击“完成”按钮。
  7. 在脚本编辑器的右上角,请点击“单击部署”->“测试部署”。
  8. 请复制 Web Apps 的 URL。
    • 本例中URL类似 https://script.google.com/macros/s/###/dev

详细信息可参见 官方文档 我的报告

2. 修改脚本。

请按如下方式修改您的脚本。请将您的Web Apps URL设置为 webAppsUrl

const functionName = "batchArchiveEmail"; // This is the function name for executing.
const webAppsUrl = "https://script.google.com/macros/s/###/dev"; // Please set your Web Apps URL.

// When v8 runtime is used, when the trigger is set from the function executing by a trigger, the trigger is disabled. This is the recognized bug. But unfortunately, this has still not been resolved. (September 21, 2021)
// https://issuetracker.google.com/issues/150756612
// https://issuetracker.google.com/issues/142405165
const doGet = _ => {
  deleteTriggers();
  ScriptApp.newTrigger(functionName).timeBased().after(60000).create();
  return ContentService.createTextOutput();

  // DriveApp.getFiles(); // This is used for automatically detecting the scopes for requesting to Web Apps. Please don't remove this comment line.
};

const deleteTriggers = _ => {
  ScriptApp.getProjectTriggers().forEach(e => {
    if (e.getHandlerFunction() == functionName) {
      ScriptApp.deleteTrigger(e);
    }
  });
}

// Please run this function.
function init() {
  deleteTriggers();
  UrlFetchApp.fetch(webAppsUrl, { headers: { authorization: "Bearer " + ScriptApp.getOAuthToken() } });
  console.log(`trigger created`)
}

/**
 * Archive emails by batches preventing controlling limiting the execution time and  
 * creating a trigger if there are still threads pending to be archived.
 */
function batchArchiveEmail() {
  const start = Date.now();
  /** 
   * Own execution time limit for the search and archiving operations to prevent an 
   * uncatchable error. As the execution time check is done in do..while condition there  
   * should be enough time to one search and archive operation and to create a trigger 
   * to start a new execution. 
   */ 
  const maxTime = 3 * 60 * 1000; // Instead of 25 use 3 for Google free accounts
  const batchSize = 50;
  let threads, elapsedTime;
  var ss=SpreadsheetApp.getActive();
  var sheet=ss.getSheetByName("Sheet1");
  /** Search and archive threads, then repeat until the search returns 0 threads or the 
   * maxTime is reached
   */ 
  var ms=[];
  do {

    threads = GmailApp.search('label:inbox before:2022/5/1');
    for (let j = 0; j < threads.length; j += batchSize) {
      //below code added by me
      ms=[];
      var messages = threads[j].getMessages();
      for (var m = 0; m < messages.length; m++) {
        var from = messages[m].getFrom(); //from field
        var mId = messages[m].getId();//id field to create the link later
        var supportStats = [];
        var to = messages[m].getTo();//to field
        var time = messages[m].getDate();//date field
        var subject = messages[m].getSubject();//subject field
        var body=messages[m].getPlainBody();
        var tel=[];
        tel = body.match(/[\+]?\d{10}|\(\d{3}\)\s?-\d{6}|\d{3}\s-\d{3}\s\d{4}/);
        supportStats.push(from);
        supportStats.push(to);
        supportStats.push(time);
        supportStats.push(subject);
        supportStats.push('https://mail.google.com/mail/u/0/#inbox/'+mId); //build the URL to the email
        supportStats.push(body);
        if (tel){supportStats.push(tel[0])} else {supportStats.push("")};
        ms.push(supportStats);
      }
    var lr=sheet.getLastRow();
    sheet.getRange(lr+1,1,ms.length,7).setValues(ms);
    //above code added by me
    GmailApp.moveThreadsToArchive(threads.slice(j, j + batchSize));
    };
    /**
     * Used to prevent to have too many calls in a short time, might not be 
     * necessary with a large enough batchSize
     */
    Utilities.sleep(2000); 
    elapsedTime = Date.now() - start;
  } while (threads.length > 0 &&  elapsedTime < maxTime);

  if (threads.length > 0) {
    /** Delete the last trigger */
    deleteTriggers();

    /** Create a one-time new trigger */
    UrlFetchApp.fetch(webAppsUrl, { headers: { authorization: "Bearer " + ScriptApp.getOAuthToken() } });
    console.log(`next trigger created`)
  } else {
    /** Delete the last trigger */
    deleteTriggers();
    console.log(`No more threads to process`);
  }
}

3. 测试。

在修改后的脚本中,第一次运行时,请使用脚本编辑器运行 init() 。这样,您的脚本就会运行,并使用 Web Apps 安装时间驱动触发器。并且,已安装的触发器会由时间驱动触发器自动运行。

注意:

  • 在此修改后的脚本中,假设您的函数 batchArchiveEmail() 运行正常。请注意这一点。
  • 如果您禁用 V8 运行时,我认为脚本可能无需上述修改即可运行。但在这种情况下,循环的进程成本会变高。 参考 在此,我想介绍一下这个解决方法。

参考:

Tanaike
2022-06-10

此处 报告了 此触发器因未知原因被禁用 的问题,似乎与 V8 运行时 有关。点击 +1 让 Google 知道您也受到此问题的影响。

停用 项目设置 > 常规设置 > 启用 Chrome V8 运行时 似乎可以解决部分用户的问题。与将 appsscript.jsonruntimeVersion 从:

"runtimeVersion": "V8"

更改为

"runtimeVersion": "DEPRECATED_ES5"
相同
Emel
2022-06-09

我测试了问题中的代码,但做了一些小改动,因为我不想存档收件箱中的所有邮件。它工作正常。

更改只是在 threads = GmailApp.search('label:inbox before:2022/5/1'); 上添加了更具体的搜索查询。更改如下所示:

threads = GmailApp.search(`label:inbox before:2022/5/1 from:"Sender Display Name"`);

其中 “发件人显示名称” 是我在主要类别中收到的新闻稿的显示名称。

考虑到此问题中的其他答案(在撰写本文时, 1 2 )以及此处的其他相关问题和其他地方的帖子,看起来 Google 出现了问题,也许这会影响一些项目/用户。来自 评论 #199 ,发布日期:2022 年 6 月 7 日 06:49 AM CDT,

Hi,

To anyone affected by this, can you provide an affected script id?

Wicket
2022-06-10