Unverified Commit 6701b0fe authored by Tobias Groza's avatar Tobias Groza Committed by GitHub
Browse files

Merge pull request #447 from Stypox/better-deobfuscation

[YouTube] Cache deobfuscation code and more fixes
parents b42a196f 89a77ae7
......@@ -94,7 +94,7 @@ public class ItagItem {
return item;
}
}
throw new ParsingException("itag=" + Integer.toString(itagId) + " not supported");
throw new ParsingException("itag=" + itagId + " not supported");
}
/*//////////////////////////////////////////////////////////////////////////
......
......@@ -3,6 +3,8 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
......@@ -42,10 +44,10 @@ import org.schabi.newpipe.extractor.utils.Utils;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.time.LocalDate;
import java.time.OffsetDateTime;
import java.time.format.DateTimeFormatter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
......@@ -86,7 +88,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
// Exceptions
//////////////////////////////////////////////////////////////////////////*/
public class DeobfuscateException extends ParsingException {
public static class DeobfuscateException extends ParsingException {
DeobfuscateException(String message, Throwable cause) {
super(message, cause);
}
......@@ -94,20 +96,17 @@ public class YoutubeStreamExtractor extends StreamExtractor {
/*//////////////////////////////////////////////////////////////////////////*/
@Nullable private static String cachedDeobfuscationCode = null;
@Nullable private String playerJsUrl = null;
private JsonArray initialAjaxJson;
@Nullable
private JsonObject playerArgs;
@Nonnull
private final Map<String, String> videoInfoPage = new HashMap<>();
private JsonObject playerResponse;
private JsonObject initialData;
@Nonnull private final Map<String, String> videoInfoPage = new HashMap<>();
private JsonObject playerResponse;
private JsonObject videoPrimaryInfoRenderer;
private JsonObject videoSecondaryInfoRenderer;
private int ageLimit;
private boolean newJsonScheme;
@Nonnull
private List<SubtitlesInfo> subtitlesInfos = new ArrayList<>();
private int ageLimit = -1;
@Nullable private List<SubtitlesStream> subtitles = null;
public YoutubeStreamExtractor(StreamingService service, LinkHandler linkHandler) {
super(service, linkHandler);
......@@ -138,18 +137,27 @@ public class YoutubeStreamExtractor extends StreamExtractor {
return title;
}
@Nullable
@Override
public String getTextualUploadDate() throws ParsingException {
if (getStreamType().equals(StreamType.LIVE_STREAM)) {
return null;
}
JsonObject micro = playerResponse.getObject("microformat").getObject("playerMicroformatRenderer");
if (micro.isString("uploadDate") && !micro.getString("uploadDate").isEmpty()) {
final JsonObject micro =
playerResponse.getObject("microformat").getObject("playerMicroformatRenderer");
if (!micro.getString("uploadDate", EMPTY_STRING).isEmpty()) {
return micro.getString("uploadDate");
}
if (micro.isString("publishDate") && !micro.getString("publishDate").isEmpty()) {
} else if (!micro.getString("publishDate", EMPTY_STRING).isEmpty()) {
return micro.getString("publishDate");
} else {
final JsonObject liveDetails = micro.getObject("liveBroadcastDetails");
if (!liveDetails.getString("endTimestamp", EMPTY_STRING).isEmpty()) {
// an ended live stream
return liveDetails.getString("endTimestamp");
} else if (!liveDetails.getString("startTimestamp", EMPTY_STRING).isEmpty()) {
// a running live stream
return liveDetails.getString("startTimestamp");
} else if (getStreamType() == StreamType.LIVE_STREAM) {
// this should never be reached, but a live stream without upload date is valid
return null;
}
}
if (getTextFromObject(getVideoPrimaryInfoRenderer().getObject("dateText")).startsWith("Premiered")) {
......@@ -163,8 +171,8 @@ public class YoutubeStreamExtractor extends StreamExtractor {
}
try { // Premiered Feb 21, 2020
LocalDate localDate = LocalDate.parse(time,
DateTimeFormatter.ofPattern("MMM dd, YYYY", Locale.ENGLISH));
final LocalDate localDate = LocalDate.parse(time,
DateTimeFormatter.ofPattern("MMM dd, yyyy", Locale.ENGLISH));
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate);
} catch (Exception ignored) {
}
......@@ -177,6 +185,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate);
} catch (Exception ignored) {
}
throw new ParsingException("Could not get upload date");
}
......@@ -224,9 +233,28 @@ public class YoutubeStreamExtractor extends StreamExtractor {
}
@Override
public int getAgeLimit() {
if (isNullOrEmpty(initialData)) throw new IllegalStateException("initialData is not parsed yet");
public int getAgeLimit() throws ParsingException {
if (ageLimit == -1) {
ageLimit = NO_AGE_LIMIT;
final JsonArray metadataRows = getVideoSecondaryInfoRenderer()
.getObject("metadataRowContainer").getObject("metadataRowContainerRenderer")
.getArray("rows");
for (final Object metadataRow : metadataRows) {
final JsonArray contents = ((JsonObject) metadataRow)
.getObject("metadataRowRenderer").getArray("contents");
for (final Object content : contents) {
final JsonArray runs = ((JsonObject) content).getArray("runs");
for (final Object run : runs) {
final String rowText = ((JsonObject) run).getString("text", EMPTY_STRING);
if (rowText.contains("Age-restricted")) {
ageLimit = 18;
return ageLimit;
}
}
}
}
}
return ageLimit;
}
......@@ -313,8 +341,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
} catch (NumberFormatException nfe) {
throw new ParsingException("Could not parse \"" + likesString + "\" as an Integer", nfe);
} catch (Exception e) {
if (ageLimit == 18) return -1;
throw new ParsingException("Could not get like count", e);
if (getAgeLimit() == NO_AGE_LIMIT) {
throw new ParsingException("Could not get like count", e);
}
return -1;
}
}
......@@ -337,8 +367,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
} catch (NumberFormatException nfe) {
throw new ParsingException("Could not parse \"" + dislikesString + "\" as an Integer", nfe);
} catch (Exception e) {
if (ageLimit == 18) return -1;
throw new ParsingException("Could not get dislike count", e);
if (getAgeLimit() == NO_AGE_LIMIT) {
throw new ParsingException("Could not get dislike count", e);
}
return -1;
}
}
......@@ -402,8 +434,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
}
if (isNullOrEmpty(url)) {
if (ageLimit == 18) return "";
throw new ParsingException("Could not get uploader avatar URL");
if (ageLimit == NO_AGE_LIMIT) {
throw new ParsingException("Could not get uploader avatar URL");
}
return "";
}
return fixThumbnailUrl(url);
......@@ -411,19 +445,19 @@ public class YoutubeStreamExtractor extends StreamExtractor {
@Nonnull
@Override
public String getSubChannelUrl() throws ParsingException {
public String getSubChannelUrl() {
return "";
}
@Nonnull
@Override
public String getSubChannelName() throws ParsingException {
public String getSubChannelName() {
return "";
}
@Nonnull
@Override
public String getSubChannelAvatarUrl() throws ParsingException {
public String getSubChannelAvatarUrl() {
return "";
}
......@@ -437,8 +471,6 @@ public class YoutubeStreamExtractor extends StreamExtractor {
return playerResponse.getObject("streamingData").getString("dashManifestUrl");
} else if (videoInfoPage.containsKey("dashmpd")) {
dashManifestUrl = videoInfoPage.get("dashmpd");
} else if (playerArgs != null && playerArgs.isString("dashmpd")) {
dashManifestUrl = playerArgs.getString("dashmpd", EMPTY_STRING);
} else {
return "";
}
......@@ -447,7 +479,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
String obfuscatedSig = Parser.matchGroup1("/s/([a-fA-F0-9\\.]+)", dashManifestUrl);
String deobfuscatedSig;
deobfuscatedSig = deobfuscateSignature(obfuscatedSig, deobfuscationCode);
deobfuscatedSig = deobfuscateSignature(obfuscatedSig);
dashManifestUrl = dashManifestUrl.replace("/s/" + obfuscatedSig, "/signature/" + deobfuscatedSig);
}
......@@ -465,11 +497,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
try {
return playerResponse.getObject("streamingData").getString("hlsManifestUrl");
} catch (Exception e) {
if (playerArgs != null && playerArgs.isString("hlsvp")) {
return playerArgs.getString("hlsvp");
} else {
throw new ParsingException("Could not get hls manifest url", e);
}
throw new ParsingException("Could not get hls manifest url", e);
}
}
......@@ -535,35 +563,57 @@ public class YoutubeStreamExtractor extends StreamExtractor {
@Override
@Nonnull
public List<SubtitlesStream> getSubtitlesDefault() {
public List<SubtitlesStream> getSubtitlesDefault() throws ParsingException {
return getSubtitles(MediaFormat.TTML);
}
@Override
@Nonnull
public List<SubtitlesStream> getSubtitles(final MediaFormat format) {
public List<SubtitlesStream> getSubtitles(final MediaFormat format) throws ParsingException {
assertPageFetched();
List<SubtitlesStream> subtitles = new ArrayList<>();
for (final SubtitlesInfo subtitlesInfo : subtitlesInfos) {
subtitles.add(subtitlesInfo.getSubtitle(format));
// If the video is age restricted getPlayerConfig will fail
if (getAgeLimit() != NO_AGE_LIMIT) {
return Collections.emptyList();
}
if (subtitles != null) {
// already calculated
return subtitles;
}
final JsonObject renderer = playerResponse.getObject("captions")
.getObject("playerCaptionsTracklistRenderer");
final JsonArray captionsArray = renderer.getArray("captionTracks");
// TODO: use this to apply auto translation to different language from a source language
// final JsonArray autoCaptionsArray = renderer.getArray("translationLanguages");
subtitles = new ArrayList<>();
for (int i = 0; i < captionsArray.size(); i++) {
final String languageCode = captionsArray.getObject(i).getString("languageCode");
final String baseUrl = captionsArray.getObject(i).getString("baseUrl");
final String vssId = captionsArray.getObject(i).getString("vssId");
if (languageCode != null && baseUrl != null && vssId != null) {
final boolean isAutoGenerated = vssId.startsWith("a.");
final String cleanUrl = baseUrl
.replaceAll("&fmt=[^&]*", "") // Remove preexisting format if exists
.replaceAll("&tlang=[^&]*", ""); // Remove translation language
subtitles.add(new SubtitlesStream(format, languageCode,
cleanUrl + "&fmt=" + format.getSuffix(), isAutoGenerated));
}
}
return subtitles;
}
@Override
public StreamType getStreamType() throws ParsingException {
public StreamType getStreamType() {
assertPageFetched();
try {
if (!playerResponse.getObject("streamingData").has(FORMATS) ||
(playerArgs != null && playerArgs.has("ps") && playerArgs.get("ps").toString().equals("live"))) {
return StreamType.LIVE_STREAM;
}
} catch (Exception e) {
throw new ParsingException("Could not get stream type", e);
}
return StreamType.VIDEO_STREAM;
return playerResponse.getObject("streamingData").has(FORMATS)
? StreamType.VIDEO_STREAM : StreamType.LIVE_STREAM;
}
@Nullable
private StreamInfoItemExtractor getNextStream() throws ExtractionException {
try {
final JsonObject firstWatchNextItem = initialData.getObject("contents")
......@@ -648,48 +698,24 @@ public class YoutubeStreamExtractor extends StreamExtractor {
"\\bc\\s*&&\\s*d\\.set\\([^,]+\\s*,\\s*(:encodeURIComponent\\s*\\()([a-zA-Z0-9$]+)\\("
};
private volatile String deobfuscationCode = "";
@Override
public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
final String url = getUrl() + "&pbj=1";
final String playerUrl;
initialAjaxJson = getJsonResponse(url, getExtractorLocalization());
if (initialAjaxJson.getObject(2).has("response")) { // age-restricted videos
initialData = initialAjaxJson.getObject(2).getObject("response");
ageLimit = 18;
final EmbeddedInfo info = getEmbeddedInfo();
final String videoInfoUrl = getVideoInfoUrl(getId(), info.sts);
final String infoPageResponse = downloader.get(videoInfoUrl, getExtractorLocalization()).responseBody();
videoInfoPage.putAll(Parser.compatParseMap(infoPageResponse));
playerUrl = info.url;
} else {
ageLimit = NO_AGE_LIMIT;
JsonObject playerConfig;
initialData = initialAjaxJson.getObject(3).getObject("response");
// sometimes at random YouTube does not provide the player config
playerConfig = initialAjaxJson.getObject(2).getObject("player", null);
if (playerConfig == null) {
newJsonScheme = true;
final EmbeddedInfo info = getEmbeddedInfo();
final String videoInfoUrl = getVideoInfoUrl(getId(), info.sts);
final String infoPageResponse = downloader.get(videoInfoUrl, getExtractorLocalization()).responseBody();
videoInfoPage.putAll(Parser.compatParseMap(infoPageResponse));
playerUrl = info.url;
} else {
playerArgs = getPlayerArgs(playerConfig);
playerUrl = getPlayerUrl(playerConfig);
public void onFetchPage(@Nonnull final Downloader downloader)
throws IOException, ExtractionException {
initialAjaxJson = getJsonResponse(getUrl() + "&pbj=1", getExtractorLocalization());
initialData = initialAjaxJson.getObject(3).getObject("response", null);
if (initialData == null) {
initialData = initialAjaxJson.getObject(2).getObject("response", null);
if (initialData == null) {
throw new ParsingException("Could not get initial data");
}
}
playerResponse = getPlayerResponse();
playerResponse = initialAjaxJson.getObject(2).getObject("playerResponse", null);
if (playerResponse == null || !playerResponse.has("streamingData")) {
// try to get player response by fetching video info page
fetchVideoInfoPage();
}
final JsonObject playabilityStatus = playerResponse.getObject("playabilityStatus");
final String status = playabilityStatus.getString("status");
......@@ -698,117 +724,77 @@ public class YoutubeStreamExtractor extends StreamExtractor {
final String reason = playabilityStatus.getString("reason");
throw new ContentNotAvailableException("Got error: \"" + reason + "\"");
}
if (deobfuscationCode.isEmpty()) {
deobfuscationCode = loadDeobfuscationCode(playerUrl);
}
if (subtitlesInfos.isEmpty()) {
subtitlesInfos.addAll(getAvailableSubtitlesInfo());
}
}
private JsonObject getPlayerArgs(final JsonObject playerConfig) throws ParsingException {
//attempt to load the youtube js player JSON arguments
final JsonObject playerArgs = playerConfig.getObject("args", null);
if (playerArgs == null) {
throw new ParsingException("Could not extract args from YouTube player config");
}
return playerArgs;
}
private void fetchVideoInfoPage() throws ParsingException, ReCaptchaException, IOException {
final String sts = getEmbeddedInfoStsAndStorePlayerJsUrl();
final String videoInfoUrl = getVideoInfoUrl(getId(), sts);
final String infoPageResponse = NewPipe.getDownloader()
.get(videoInfoUrl, getExtractorLocalization()).responseBody();
videoInfoPage.putAll(Parser.compatParseMap(infoPageResponse));
private String getPlayerUrl(final JsonObject playerConfig) throws ParsingException {
// The Youtube service needs to be initialized by downloading the
// js-Youtube-player. This is done in order to get the algorithm
// for deobfuscating cryptic signatures inside certain stream URLs.
final String playerUrl = playerConfig.getObject("assets").getString("js");
if (playerUrl == null) {
throw new ParsingException("Could not extract js URL from YouTube player config");
} else if (playerUrl.startsWith("//")) {
return HTTPS + playerUrl;
}
return playerUrl;
}
private JsonObject getPlayerResponse() throws ParsingException {
try {
String playerResponseStr;
if (newJsonScheme) {
return initialAjaxJson.getObject(2).getObject("playerResponse");
}
if (playerArgs != null) {
playerResponseStr = playerArgs.getString("player_response");
} else {
playerResponseStr = videoInfoPage.get("player_response");
}
return JsonParser.object().from(playerResponseStr);
} catch (Exception e) {
throw new ParsingException("Could not parse yt player response", e);
playerResponse = JsonParser.object().from(videoInfoPage.get("player_response"));
} catch (JsonParserException e) {
throw new ParsingException(
"Could not parse YouTube player response from video info page", e);
}
}
@Nonnull
private EmbeddedInfo getEmbeddedInfo() throws ParsingException, ReCaptchaException {
private String getEmbeddedInfoStsAndStorePlayerJsUrl() {
try {
final Downloader downloader = NewPipe.getDownloader();
final String embedUrl = "https://www.youtube.com/embed/" + getId();
final String embedPageContent = downloader.get(embedUrl, getExtractorLocalization()).responseBody();
final String embedPageContent = NewPipe.getDownloader()
.get(embedUrl, getExtractorLocalization()).responseBody();
// Get player url
String playerUrl = null;
try {
final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")";
playerUrl = Parser.matchGroup1(assetsPattern, embedPageContent)
playerJsUrl = Parser.matchGroup1(assetsPattern, embedPageContent)
.replace("\\", "").replace("\"", "");
} catch (Parser.RegexException ex) {
// playerUrl is still available in the file, just somewhere else
// playerJsUrl is still available in the file, just somewhere else TODO
// it is ok not to find it, see how that's handled in getDeobfuscationCode()
final Document doc = Jsoup.parse(embedPageContent);
final Elements elems = doc.select("script").attr("name", "player_ias/base");
for (Element elem : elems) {
if (elem.attr("src").contains("base.js")) {
playerUrl = elem.attr("src");
playerJsUrl = elem.attr("src");
break;
}
}
if (playerUrl == null) {
throw new ParsingException("Could not get playerUrl");
}
}
if (playerUrl.startsWith("//")) {
playerUrl = HTTPS + playerUrl;
} else if (playerUrl.startsWith("/")) {
playerUrl = HTTPS + "//youtube.com" + playerUrl;
}
// Get embed sts
return Parser.matchGroup1("\"sts\"\\s*:\\s*(\\d+)", embedPageContent);
} catch (Exception i) {
// if it fails we simply reply with no sts as then it does not seem to be necessary
return "";
}
}
private String getDeobfuscationFuncName(final String playerCode) throws DeobfuscateException {
Parser.RegexException exception = null;
for (final String regex : REGEXES) {
try {
// Get embed sts
final String stsPattern = "\"sts\"\\s*:\\s*(\\d+)";
final String sts = Parser.matchGroup1(stsPattern, embedPageContent);
return new EmbeddedInfo(playerUrl, sts);
} catch (Exception i) {
// if it fails we simply reply with no sts as then it does not seem to be necessary
return new EmbeddedInfo(playerUrl, "");
return Parser.matchGroup1(regex, playerCode);
} catch (Parser.RegexException re) {
if (exception == null) {
exception = re;
}
}
} catch (IOException e) {
throw new ParsingException(
"Could not load deobfuscation code from YouTube video embed", e);
}
throw new DeobfuscateException("Could not find deobfuscate function with any of the given patterns.", exception);
}
private String loadDeobfuscationCode(String playerUrl) throws DeobfuscateException {
private String loadDeobfuscationCode(@Nonnull final String playerJsUrl)
throws DeobfuscateException {
try {
Downloader downloader = NewPipe.getDownloader();
if (!playerUrl.contains("https://youtube.com")) {
//sometimes the https://youtube.com part does not get send with
//than we have to add it by hand
playerUrl = "https://youtube.com" + playerUrl;
}
final String playerCode = downloader.get(playerUrl, getExtractorLocalization()).responseBody();
final String playerCode = NewPipe.getDownloader()
.get(playerJsUrl, getExtractorLocalization()).responseBody();
final String deobfuscationFunctionName = getDeobfuscationFuncName(playerCode);
final String functionPattern = "("
......@@ -834,7 +820,34 @@ public class YoutubeStreamExtractor extends StreamExtractor {
}
}
private String deobfuscateSignature(String obfuscatedSig, String deobfuscationCode) throws DeobfuscateException {
@Nonnull
private String getDeobfuscationCode() throws ParsingException {
if (cachedDeobfuscationCode == null) {
if (playerJsUrl == null) {
// the currentPlayerJsUrl was not found in any page fetched so far and there is
// nothing cached, so try fetching embedded info
getEmbeddedInfoStsAndStorePlayerJsUrl();
if (playerJsUrl == null) {
throw new ParsingException(
"Embedded info did not provide YouTube player js url");
}
}