Source: lib/text/vtt_text_parser.js

  1. /*! @license
  2. * Shaka Player
  3. * Copyright 2016 Google LLC
  4. * SPDX-License-Identifier: Apache-2.0
  5. */
  6. goog.provide('shaka.text.VttTextParser');
  7. goog.require('goog.asserts');
  8. goog.require('shaka.log');
  9. goog.require('shaka.media.ManifestParser');
  10. goog.require('shaka.text.Cue');
  11. goog.require('shaka.text.CueRegion');
  12. goog.require('shaka.text.TextEngine');
  13. goog.require('shaka.util.Error');
  14. goog.require('shaka.util.StringUtils');
  15. goog.require('shaka.util.TextParser');
  16. /**
  17. * @implements {shaka.extern.TextParser}
  18. * @export
  19. */
  20. shaka.text.VttTextParser = class {
  21. /** Constructs a VTT parser. */
  22. constructor() {
  23. /** @private {boolean} */
  24. this.sequenceMode_ = false;
  25. /** @private {string} */
  26. this.manifestType_ = shaka.media.ManifestParser.UNKNOWN;
  27. }
  28. /**
  29. * @override
  30. * @export
  31. */
  32. parseInit(data) {
  33. goog.asserts.assert(false, 'VTT does not have init segments');
  34. }
  35. /**
  36. * @override
  37. * @export
  38. */
  39. setSequenceMode(sequenceMode) {
  40. this.sequenceMode_ = sequenceMode;
  41. }
  42. /**
  43. * @override
  44. * @export
  45. */
  46. setManifestType(manifestType) {
  47. this.manifestType_ = manifestType;
  48. }
  49. /**
  50. * @override
  51. * @export
  52. */
  53. parseMedia(data, time) {
  54. const VttTextParser = shaka.text.VttTextParser;
  55. // Get the input as a string. Normalize newlines to \n.
  56. let str = shaka.util.StringUtils.fromUTF8(data);
  57. str = str.replace(/\r\n|\r(?=[^\n]|$)/gm, '\n');
  58. const blocks = str.split(/\n{2,}/m);
  59. if (!/^WEBVTT($|[ \t\n])/m.test(blocks[0])) {
  60. throw new shaka.util.Error(
  61. shaka.util.Error.Severity.CRITICAL,
  62. shaka.util.Error.Category.TEXT,
  63. shaka.util.Error.Code.INVALID_TEXT_HEADER);
  64. }
  65. // Depending on "segmentRelativeVttTiming" configuration,
  66. // "vttOffset" will correspond to either "periodStart" (default)
  67. // or "segmentStart", for segmented VTT where timings are relative
  68. // to the beginning of each segment.
  69. // NOTE: "periodStart" is the timestamp offset applied via TextEngine.
  70. // It is no longer closely tied to periods, but the name stuck around.
  71. // NOTE: This offset and the flag choosing its meaning have no effect on
  72. // HLS content, which should use X-TIMESTAMP-MAP and periodStart instead.
  73. let offset = time.vttOffset;
  74. // Only use 'X-TIMESTAMP-MAP' with HLS. This overrides offset above.
  75. if (blocks[0].includes('X-TIMESTAMP-MAP') &&
  76. this.manifestType_ == shaka.media.ManifestParser.HLS) {
  77. if (this.sequenceMode_) {
  78. // Compute a different, rollover-based offset for sequence mode.
  79. offset = this.computeHlsSequenceModeOffset_(blocks[0], time);
  80. } else {
  81. // Calculate the offset from the segment startTime.
  82. offset = time.segmentStart;
  83. }
  84. }
  85. // Parse VTT regions.
  86. /* !Array.<!shaka.text.CueRegion> */
  87. const regions = [];
  88. for (const line of blocks[0].split('\n')) {
  89. if (/^Region:/.test(line)) {
  90. const region = VttTextParser.parseRegion_(line);
  91. regions.push(region);
  92. }
  93. }
  94. /** @type {!Map.<string, !shaka.text.Cue>} */
  95. const styles = new Map();
  96. shaka.text.Cue.addDefaultTextColor(styles);
  97. // Parse cues.
  98. const ret = [];
  99. for (const block of blocks.slice(1)) {
  100. const lines = block.split('\n');
  101. VttTextParser.parseStyle_(lines, styles);
  102. const cue = VttTextParser.parseCue_(lines, offset, regions, styles);
  103. if (cue) {
  104. ret.push(cue);
  105. }
  106. }
  107. return ret;
  108. }
  109. /**
  110. * @param {string} headerBlock Contains X-TIMESTAMP-MAP.
  111. * @param {shaka.extern.TextParser.TimeContext} time
  112. * @return {number}
  113. * @private
  114. */
  115. computeHlsSequenceModeOffset_(headerBlock, time) {
  116. // https://bit.ly/2K92l7y
  117. // The 'X-TIMESTAMP-MAP' header is used in HLS to align text with
  118. // the rest of the media.
  119. // The header format is 'X-TIMESTAMP-MAP=MPEGTS:n,LOCAL:m'
  120. // (the attributes can go in any order)
  121. // where n is MPEG-2 time and m is cue time it maps to.
  122. // For example 'X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:900000'
  123. // means an offset of 10 seconds
  124. // 900000/MPEG_TIMESCALE - cue time.
  125. const cueTimeMatch = headerBlock.match(
  126. /LOCAL:((?:(\d{1,}):)?(\d{2}):(\d{2})\.(\d{3}))/m);
  127. const mpegTimeMatch = headerBlock.match(/MPEGTS:(\d+)/m);
  128. if (!cueTimeMatch || !mpegTimeMatch) {
  129. throw new shaka.util.Error(
  130. shaka.util.Error.Severity.CRITICAL,
  131. shaka.util.Error.Category.TEXT,
  132. shaka.util.Error.Code.INVALID_TEXT_HEADER);
  133. }
  134. const cueTime = shaka.util.TextParser.parseTime(cueTimeMatch[1]);
  135. if (cueTime == null) {
  136. throw new shaka.util.Error(
  137. shaka.util.Error.Severity.CRITICAL,
  138. shaka.util.Error.Category.TEXT,
  139. shaka.util.Error.Code.INVALID_TEXT_HEADER);
  140. }
  141. let mpegTime = Number(mpegTimeMatch[1]);
  142. const mpegTimescale = shaka.text.VttTextParser.MPEG_TIMESCALE_;
  143. const rolloverSeconds =
  144. shaka.text.VttTextParser.TS_ROLLOVER_ / mpegTimescale;
  145. let segmentStart = time.segmentStart - time.periodStart;
  146. while (segmentStart >= rolloverSeconds) {
  147. segmentStart -= rolloverSeconds;
  148. mpegTime += shaka.text.VttTextParser.TS_ROLLOVER_;
  149. }
  150. return time.periodStart + mpegTime / mpegTimescale - cueTime;
  151. }
  152. /**
  153. * Parses a string into a Region object.
  154. *
  155. * @param {string} text
  156. * @return {!shaka.text.CueRegion}
  157. * @private
  158. */
  159. static parseRegion_(text) {
  160. const VttTextParser = shaka.text.VttTextParser;
  161. const parser = new shaka.util.TextParser(text);
  162. // The region string looks like this:
  163. // Region: id=fred width=50% lines=3 regionanchor=0%,100%
  164. // viewportanchor=10%,90% scroll=up
  165. const region = new shaka.text.CueRegion();
  166. // Skip 'Region:'
  167. parser.readWord();
  168. parser.skipWhitespace();
  169. let word = parser.readWord();
  170. while (word) {
  171. if (!VttTextParser.parseRegionSetting_(region, word)) {
  172. shaka.log.warning(
  173. 'VTT parser encountered an invalid VTTRegion setting: ', word,
  174. ' The setting will be ignored.');
  175. }
  176. parser.skipWhitespace();
  177. word = parser.readWord();
  178. }
  179. return region;
  180. }
  181. /**
  182. * Parses a style block into a Cue object.
  183. *
  184. * @param {!Array.<string>} text
  185. * @param {!Map.<string, !shaka.text.Cue>} styles
  186. * @private
  187. */
  188. static parseStyle_(text, styles) {
  189. // Skip empty blocks.
  190. if (text.length == 1 && !text[0]) {
  191. return;
  192. }
  193. // Skip comment blocks.
  194. if (/^NOTE($|[ \t])/.test(text[0])) {
  195. return;
  196. }
  197. // Only style block are allowed.
  198. if (text[0] != 'STYLE') {
  199. return;
  200. }
  201. /** @type {!Array.<!Array.<string>>} */
  202. const styleBlocks = [];
  203. let lastBlockIndex = -1;
  204. for (let i = 1; i < text.length; i++) {
  205. if (text[i].includes('::cue')) {
  206. styleBlocks.push([]);
  207. lastBlockIndex = styleBlocks.length - 1;
  208. }
  209. if (lastBlockIndex == -1) {
  210. continue;
  211. }
  212. styleBlocks[lastBlockIndex].push(text[i]);
  213. if (text[i].includes('}')) {
  214. lastBlockIndex = -1;
  215. }
  216. }
  217. for (const styleBlock of styleBlocks) {
  218. let styleSelector = 'global';
  219. // Look for what is within parentheses. For example:
  220. // <code>:: cue (b) {</code>, what we are looking for is <code>b</code>
  221. const selector = styleBlock[0].match(/\((.*)\)/);
  222. if (selector) {
  223. styleSelector = selector.pop();
  224. }
  225. // We start at 1 to avoid '::cue' and end earlier to avoid '}'
  226. let propertyLines = styleBlock.slice(1, -1);
  227. if (styleBlock[0].includes('}')) {
  228. const payload = /\{(.*?)\}/.exec(styleBlock[0]);
  229. if (payload) {
  230. propertyLines = payload[1].split(';');
  231. }
  232. }
  233. // Continue styles over multiple selectors if necessary.
  234. // For example,
  235. // ::cue(b) { background: white; } ::cue(b) { color: blue; }
  236. // should set both the background and foreground of bold tags.
  237. let cue = styles.get(styleSelector);
  238. if (!cue) {
  239. cue = new shaka.text.Cue(0, 0, '');
  240. }
  241. let validStyle = false;
  242. for (let i = 0; i < propertyLines.length; i++) {
  243. // We look for CSS properties. As a general rule they are separated by
  244. // <code>:</code>. Eg: <code>color: red;</code>
  245. const lineParts = /^\s*([^:]+):\s*(.*)/.exec(propertyLines[i]);
  246. if (lineParts) {
  247. const name = lineParts[1].trim();
  248. const value = lineParts[2].trim().replace(';', '');
  249. switch (name) {
  250. case 'background-color':
  251. case 'background':
  252. validStyle = true;
  253. cue.backgroundColor = value;
  254. break;
  255. case 'color':
  256. validStyle = true;
  257. cue.color = value;
  258. break;
  259. case 'font-family':
  260. validStyle = true;
  261. cue.fontFamily = value;
  262. break;
  263. case 'font-size':
  264. validStyle = true;
  265. cue.fontSize = value;
  266. break;
  267. case 'font-weight':
  268. if (parseInt(value, 10) >= 700 || value == 'bold') {
  269. validStyle = true;
  270. cue.fontWeight = shaka.text.Cue.fontWeight.BOLD;
  271. }
  272. break;
  273. case 'font-style':
  274. switch (value) {
  275. case 'normal':
  276. validStyle = true;
  277. cue.fontStyle = shaka.text.Cue.fontStyle.NORMAL;
  278. break;
  279. case 'italic':
  280. validStyle = true;
  281. cue.fontStyle = shaka.text.Cue.fontStyle.ITALIC;
  282. break;
  283. case 'oblique':
  284. validStyle = true;
  285. cue.fontStyle = shaka.text.Cue.fontStyle.OBLIQUE;
  286. break;
  287. }
  288. break;
  289. case 'opacity':
  290. validStyle = true;
  291. cue.opacity = parseFloat(value);
  292. break;
  293. case 'text-combine-upright':
  294. validStyle = true;
  295. cue.textCombineUpright = value;
  296. break;
  297. case 'text-shadow':
  298. validStyle = true;
  299. cue.textShadow = value;
  300. break;
  301. case 'white-space':
  302. validStyle = true;
  303. cue.wrapLine = value != 'noWrap';
  304. break;
  305. default:
  306. shaka.log.warning('VTT parser encountered an unsupported style: ',
  307. lineParts);
  308. break;
  309. }
  310. }
  311. }
  312. if (validStyle) {
  313. styles.set(styleSelector, cue);
  314. }
  315. }
  316. }
  317. /**
  318. * Parses a text block into a Cue object.
  319. *
  320. * @param {!Array.<string>} text
  321. * @param {number} timeOffset
  322. * @param {!Array.<!shaka.text.CueRegion>} regions
  323. * @param {!Map.<string, !shaka.text.Cue>} styles
  324. * @return {shaka.text.Cue}
  325. * @private
  326. */
  327. static parseCue_(text, timeOffset, regions, styles) {
  328. const VttTextParser = shaka.text.VttTextParser;
  329. // Skip empty blocks.
  330. if (text.length == 1 && !text[0]) {
  331. return null;
  332. }
  333. // Skip comment blocks.
  334. if (/^NOTE($|[ \t])/.test(text[0])) {
  335. return null;
  336. }
  337. // Skip style and region blocks.
  338. if (text[0] == 'STYLE' || text[0] == 'REGION') {
  339. return null;
  340. }
  341. let id = null;
  342. if (!text[0].includes('-->')) {
  343. id = text[0];
  344. text.splice(0, 1);
  345. }
  346. // Parse the times.
  347. const parser = new shaka.util.TextParser(text[0]);
  348. let start = parser.parseTime();
  349. const expect = parser.readRegex(/[ \t]+-->[ \t]+/g);
  350. let end = parser.parseTime();
  351. if (start == null || expect == null || end == null) {
  352. shaka.log.alwaysWarn(
  353. 'Failed to parse VTT time code. Cue skipped:', id, text);
  354. return null;
  355. }
  356. start += timeOffset;
  357. end += timeOffset;
  358. // Get the payload.
  359. const payload = text.slice(1).join('\n').trim();
  360. let cue = null;
  361. if (styles.has('global')) {
  362. cue = styles.get('global').clone();
  363. cue.startTime = start;
  364. cue.endTime = end;
  365. cue.payload = payload;
  366. } else {
  367. cue = new shaka.text.Cue(start, end, payload);
  368. }
  369. // Parse optional settings.
  370. parser.skipWhitespace();
  371. let word = parser.readWord();
  372. while (word) {
  373. if (!VttTextParser.parseCueSetting(cue, word, regions)) {
  374. shaka.log.warning('VTT parser encountered an invalid VTT setting: ',
  375. word,
  376. ' The setting will be ignored.');
  377. }
  378. parser.skipWhitespace();
  379. word = parser.readWord();
  380. }
  381. shaka.text.Cue.parseCuePayload(cue, styles);
  382. if (id != null) {
  383. cue.id = id;
  384. }
  385. return cue;
  386. }
  387. /**
  388. * Parses a WebVTT setting from the given word.
  389. *
  390. * @param {!shaka.text.Cue} cue
  391. * @param {string} word
  392. * @param {!Array.<!shaka.text.CueRegion>} regions
  393. * @return {boolean} True on success.
  394. */
  395. static parseCueSetting(cue, word, regions) {
  396. const VttTextParser = shaka.text.VttTextParser;
  397. let results = null;
  398. if ((results = /^align:(start|middle|center|end|left|right)$/.exec(word))) {
  399. VttTextParser.setTextAlign_(cue, results[1]);
  400. } else if ((results = /^vertical:(lr|rl)$/.exec(word))) {
  401. VttTextParser.setVerticalWritingMode_(cue, results[1]);
  402. } else if ((results = /^size:([\d.]+)%$/.exec(word))) {
  403. cue.size = Number(results[1]);
  404. } else if ((results =
  405. // eslint-disable-next-line max-len
  406. /^position:([\d.]+)%(?:,(line-left|line-right|middle|center|start|end|auto))?$/
  407. .exec(word))) {
  408. cue.position = Number(results[1]);
  409. if (results[2]) {
  410. VttTextParser.setPositionAlign_(cue, results[2]);
  411. }
  412. } else if ((results = /^region:(.*)$/.exec(word))) {
  413. const region = VttTextParser.getRegionById_(regions, results[1]);
  414. if (region) {
  415. cue.region = region;
  416. }
  417. } else {
  418. return VttTextParser.parsedLineValueAndInterpretation_(cue, word);
  419. }
  420. return true;
  421. }
  422. /**
  423. *
  424. * @param {!Array.<!shaka.text.CueRegion>} regions
  425. * @param {string} id
  426. * @return {?shaka.text.CueRegion}
  427. * @private
  428. */
  429. static getRegionById_(regions, id) {
  430. const regionsWithId = regions.filter((region) => {
  431. return region.id == id;
  432. });
  433. if (!regionsWithId.length) {
  434. shaka.log.warning('VTT parser could not find a region with id: ',
  435. id,
  436. ' The region will be ignored.');
  437. return null;
  438. }
  439. goog.asserts.assert(regionsWithId.length == 1,
  440. 'VTTRegion ids should be unique!');
  441. return regionsWithId[0];
  442. }
  443. /**
  444. * Parses a WebVTTRegion setting from the given word.
  445. *
  446. * @param {!shaka.text.CueRegion} region
  447. * @param {string} word
  448. * @return {boolean} True on success.
  449. * @private
  450. */
  451. static parseRegionSetting_(region, word) {
  452. let results = null;
  453. if ((results = /^id=(.*)$/.exec(word))) {
  454. region.id = results[1];
  455. } else if ((results = /^width=(\d{1,2}|100)%$/.exec(word))) {
  456. region.width = Number(results[1]);
  457. } else if ((results = /^lines=(\d+)$/.exec(word))) {
  458. region.height = Number(results[1]);
  459. region.heightUnits = shaka.text.CueRegion.units.LINES;
  460. } else if ((results = /^regionanchor=(\d{1,2}|100)%,(\d{1,2}|100)%$/
  461. .exec(word))) {
  462. region.regionAnchorX = Number(results[1]);
  463. region.regionAnchorY = Number(results[2]);
  464. } else if ((results = /^viewportanchor=(\d{1,2}|100)%,(\d{1,2}|100)%$/
  465. .exec(word))) {
  466. region.viewportAnchorX = Number(results[1]);
  467. region.viewportAnchorY = Number(results[2]);
  468. } else if ((results = /^scroll=up$/.exec(word))) {
  469. region.scroll = shaka.text.CueRegion.scrollMode.UP;
  470. } else {
  471. return false;
  472. }
  473. return true;
  474. }
  475. /**
  476. * @param {!shaka.text.Cue} cue
  477. * @param {string} align
  478. * @private
  479. */
  480. static setTextAlign_(cue, align) {
  481. const Cue = shaka.text.Cue;
  482. if (align == 'middle') {
  483. cue.textAlign = Cue.textAlign.CENTER;
  484. } else {
  485. goog.asserts.assert(align.toUpperCase() in Cue.textAlign,
  486. align.toUpperCase() +
  487. ' Should be in Cue.textAlign values!');
  488. cue.textAlign = Cue.textAlign[align.toUpperCase()];
  489. }
  490. }
  491. /**
  492. * @param {!shaka.text.Cue} cue
  493. * @param {string} align
  494. * @private
  495. */
  496. static setPositionAlign_(cue, align) {
  497. const Cue = shaka.text.Cue;
  498. if (align == 'line-left' || align == 'start') {
  499. cue.positionAlign = Cue.positionAlign.LEFT;
  500. } else if (align == 'line-right' || align == 'end') {
  501. cue.positionAlign = Cue.positionAlign.RIGHT;
  502. } else if (align == 'center' || align == 'middle') {
  503. cue.positionAlign = Cue.positionAlign.CENTER;
  504. } else {
  505. cue.positionAlign = Cue.positionAlign.AUTO;
  506. }
  507. }
  508. /**
  509. * @param {!shaka.text.Cue} cue
  510. * @param {string} value
  511. * @private
  512. */
  513. static setVerticalWritingMode_(cue, value) {
  514. const Cue = shaka.text.Cue;
  515. if (value == 'lr') {
  516. cue.writingMode = Cue.writingMode.VERTICAL_LEFT_TO_RIGHT;
  517. } else {
  518. cue.writingMode = Cue.writingMode.VERTICAL_RIGHT_TO_LEFT;
  519. }
  520. }
  521. /**
  522. * @param {!shaka.text.Cue} cue
  523. * @param {string} word
  524. * @return {boolean}
  525. * @private
  526. */
  527. static parsedLineValueAndInterpretation_(cue, word) {
  528. const Cue = shaka.text.Cue;
  529. let results = null;
  530. if ((results = /^line:([\d.]+)%(?:,(start|end|center))?$/.exec(word))) {
  531. cue.lineInterpretation = Cue.lineInterpretation.PERCENTAGE;
  532. cue.line = Number(results[1]);
  533. if (results[2]) {
  534. goog.asserts.assert(
  535. results[2].toUpperCase() in Cue.lineAlign,
  536. results[2].toUpperCase() + ' Should be in Cue.lineAlign values!');
  537. cue.lineAlign = Cue.lineAlign[results[2].toUpperCase()];
  538. }
  539. } else if ((results =
  540. /^line:(-?\d+)(?:,(start|end|center))?$/.exec(word))) {
  541. cue.lineInterpretation = Cue.lineInterpretation.LINE_NUMBER;
  542. cue.line = Number(results[1]);
  543. if (results[2]) {
  544. goog.asserts.assert(
  545. results[2].toUpperCase() in Cue.lineAlign,
  546. results[2].toUpperCase() + ' Should be in Cue.lineAlign values!');
  547. cue.lineAlign = Cue.lineAlign[results[2].toUpperCase()];
  548. }
  549. } else {
  550. return false;
  551. }
  552. return true;
  553. }
  554. };
  555. /**
  556. * @const {number}
  557. * @private
  558. */
  559. shaka.text.VttTextParser.MPEG_TIMESCALE_ = 90000;
  560. /**
  561. * At this value, timestamps roll over in TS content.
  562. * @const {number}
  563. * @private
  564. */
  565. shaka.text.VttTextParser.TS_ROLLOVER_ = 0x200000000;
  566. shaka.text.TextEngine.registerParser(
  567. 'text/vtt', () => new shaka.text.VttTextParser());
  568. shaka.text.TextEngine.registerParser(
  569. 'text/vtt; codecs="vtt"', () => new shaka.text.VttTextParser());
  570. shaka.text.TextEngine.registerParser(
  571. 'text/vtt; codecs="wvtt"', () => new shaka.text.VttTextParser());