我试图在视频的每一帧上绘制一个位图作为叠加层。我找到了一个关于如何对视频进行解码和编码的示例,并且它正在运行。这个例子有一个带有 drawFrame 函数的 TextureRenderer 类,我需要修改它以添加位图。我是 opengl 的新手,但我了解到我需要使用位图创建纹理并绑定(bind)它。我在下面的代码中尝试过,但它抛出异常。
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// from: https://android.googlesource.com/platform/cts/+/lollipop-release/tests/tests/media/src/android/media/cts/TextureRender.java
// blob: 4125dcfcfed6ed7fddba5b71d657dec0d433da6a
// modified: removed unused method bodies
// modified: use GL_LINEAR for GL_TEXTURE_MIN_FILTER to improve quality.
package com.example.name.videoeditortest;
/**
* Code for rendering a texture onto a surface using OpenGL ES 2.0.
*/
import android.graphics.Bitmap;
import android.graphics.SurfaceTexture;
import android.opengl.GLES11Ext;
import android.opengl.GLES20;
import android.opengl.GLUtils;
import android.opengl.Matrix;
import android.util.Log;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.FloatBuffer;
/**
* Code for rendering a texture onto a surface using OpenGL ES 2.0.
*/
class TextureRender {
private Bitmap bitmap;
private static final String TAG = "TextureRender";
private static final int FLOAT_SIZE_BYTES = 4;
private static final int TRIANGLE_VERTICES_DATA_STRIDE_BYTES = 5 * FLOAT_SIZE_BYTES;
private static final int TRIANGLE_VERTICES_DATA_POS_OFFSET = 0;
private static final int TRIANGLE_VERTICES_DATA_UV_OFFSET = 3;
private final float[] mTriangleVerticesData = {
// X, Y, Z, U, V
-1.0f, -1.0f, 0, 0.f, 0.f,
1.0f, -1.0f, 0, 1.f, 0.f,
-1.0f, 1.0f, 0, 0.f, 1.f,
1.0f, 1.0f, 0, 1.f, 1.f,
};
private FloatBuffer mTriangleVertices;
private static final String VERTEX_SHADER =
"uniform mat4 uMVPMatrix;\n" +
"uniform mat4 uSTMatrix;\n" +
"attribute vec4 aPosition;\n" +
"attribute vec4 aTextureCoord;\n" +
"varying vec2 vTextureCoord;\n" +
"void main() {\n" +
" gl_Position = uMVPMatrix * aPosition;\n" +
" vTextureCoord = (uSTMatrix * aTextureCoord).xy;\n" +
"}\n";
private static final String FRAGMENT_SHADER =
"#extension GL_OES_EGL_image_external : require\n" +
"precision mediump float;\n" + // highp here doesn't seem to matter
"varying vec2 vTextureCoord;\n" +
"uniform samplerExternalOES sTexture;\n" +
"void main() {\n" +
" gl_FragColor = texture2D(sTexture, vTextureCoord);\n" +
"}\n";
private float[] mMVPMatrix = new float[16];
private float[] mSTMatrix = new float[16];
private int mProgram;
private int mTextureID = -12345;
private int mTextureBitmapID = -12345;
private int muMVPMatrixHandle;
private int muSTMatrixHandle;
private int maPositionHandle;
private int maTextureHandle;
public TextureRender() {
mTriangleVertices = ByteBuffer.allocateDirect(
mTriangleVerticesData.length * FLOAT_SIZE_BYTES)
.order(ByteOrder.nativeOrder()).asFloatBuffer();
mTriangleVertices.put(mTriangleVerticesData).position(0);
Matrix.setIdentityM(mSTMatrix, 0);
}
public int getTextureId() {
return mTextureID;
}
public void drawFrame(SurfaceTexture st) {
checkGlError("onDrawFrame start");
st.getTransformMatrix(mSTMatrix);
GLES20.glClearColor(0.0f, 1.0f, 0.0f, 1.0f);
GLES20.glClear(GLES20.GL_DEPTH_BUFFER_BIT | GLES20.GL_COLOR_BUFFER_BIT);
GLES20.glUseProgram(mProgram);
checkGlError("glUseProgram");
//Bing textrues
GLES20.glActiveTexture(GLES20.GL_TEXTURE0);
GLES20.glBindTexture(GLES11Ext.GL_TEXTURE_EXTERNAL_OES, mTextureID);
GLES20.glActiveTexture(GLES20.GL_TEXTURE_2D);
GLES20.glBindTexture(GLES20.GL_TEXTURE_2D, mTextureBitmapID);
mTriangleVertices.position(TRIANGLE_VERTICES_DATA_POS_OFFSET);
GLES20.glVertexAttribPointer(maPositionHandle, 3, GLES20.GL_FLOAT, false,
TRIANGLE_VERTICES_DATA_STRIDE_BYTES, mTriangleVertices);
checkGlError("glVertexAttribPointer maPosition");
GLES20.glEnableVertexAttribArray(maPositionHandle);
checkGlError("glEnableVertexAttribArray maPositionHandle");
mTriangleVertices.position(TRIANGLE_VERTICES_DATA_UV_OFFSET);
GLES20.glVertexAttribPointer(maTextureHandle, 2, GLES20.GL_FLOAT, false,
TRIANGLE_VERTICES_DATA_STRIDE_BYTES, mTriangleVertices);
checkGlError("glVertexAttribPointer maTextureHandle");
GLES20.glEnableVertexAttribArray(maTextureHandle);
checkGlError("glEnableVertexAttribArray maTextureHandle");
Matrix.setIdentityM(mMVPMatrix, 0);
GLES20.glUniformMatrix4fv(muMVPMatrixHandle, 1, false, mMVPMatrix, 0);
GLES20.glUniformMatrix4fv(muSTMatrixHandle, 1, false, mSTMatrix, 0);
GLES20.glDrawArrays(GLES20.GL_TRIANGLE_STRIP, 0, 4);
checkGlError("glDrawArrays");
GLES20.glFinish();
}
/**
* Initializes GL state. Call this after the EGL surface has been created and made current.
*/
public void surfaceCreated() {
mProgram = createProgram(VERTEX_SHADER, FRAGMENT_SHADER);
if (mProgram == 0) {
throw new RuntimeException("failed creating program");
}
maPositionHandle = GLES20.glGetAttribLocation(mProgram, "aPosition");
checkGlError("glGetAttribLocation aPosition");
if (maPositionHandle == -1) {
throw new RuntimeException("Could not get attrib location for aPosition");
}
maTextureHandle = GLES20.glGetAttribLocation(mProgram, "aTextureCoord");
checkGlError("glGetAttribLocation aTextureCoord");
if (maTextureHandle == -1) {
throw new RuntimeException("Could not get attrib location for aTextureCoord");
}
muMVPMatrixHandle = GLES20.glGetUniformLocation(mProgram, "uMVPMatrix");
checkGlError("glGetUniformLocation uMVPMatrix");
if (muMVPMatrixHandle == -1) {
throw new RuntimeException("Could not get attrib location for uMVPMatrix");
}
muSTMatrixHandle = GLES20.glGetUniformLocation(mProgram, "uSTMatrix");
checkGlError("glGetUniformLocation uSTMatrix");
if (muSTMatrixHandle == -1) {
throw new RuntimeException("Could not get attrib location for uSTMatrix");
}
int[] textures = new int[1];
GLES20.glGenTextures(1, textures, 0);
mTextureID = textures[0];
GLES20.glBindTexture(GLES11Ext.GL_TEXTURE_EXTERNAL_OES, mTextureID);
checkGlError("glBindTexture mTextureID");
GLES20.glTexParameterf(GLES11Ext.GL_TEXTURE_EXTERNAL_OES, GLES20.GL_TEXTURE_MIN_FILTER,
GLES20.GL_NEAREST);
GLES20.glTexParameterf(GLES11Ext.GL_TEXTURE_EXTERNAL_OES, GLES20.GL_TEXTURE_MAG_FILTER,
GLES20.GL_LINEAR);
GLES20.glTexParameteri(GLES11Ext.GL_TEXTURE_EXTERNAL_OES, GLES20.GL_TEXTURE_WRAP_S,
GLES20.GL_CLAMP_TO_EDGE);
GLES20.glTexParameteri(GLES11Ext.GL_TEXTURE_EXTERNAL_OES, GLES20.GL_TEXTURE_WRAP_T,
GLES20.GL_CLAMP_TO_EDGE);
checkGlError("glTexParameter");
mTextureBitmapID = loadBitmapTexture();
}
private int loadBitmapTexture()
{
final int[] textureHandle = new int[1];
GLES20.glGenTextures(1, textureHandle, 0);
if (textureHandle[0] != 0)
{
// Bind to the texture in OpenGL
GLES20.glBindTexture(GLES20.GL_TEXTURE_2D, textureHandle[0]);
// Set filtering
GLES20.glTexParameteri(GLES20.GL_TEXTURE_2D, GLES20.GL_TEXTURE_MIN_FILTER, GLES20.GL_NEAREST);
GLES20.glTexParameteri(GLES20.GL_TEXTURE_2D, GLES20.GL_TEXTURE_MAG_FILTER, GLES20.GL_NEAREST);
// Load the bitmap into the bound texture.
GLUtils.texImage2D(GLES20.GL_TEXTURE_2D, 0, bitmap, 0);
}
if (textureHandle[0] == 0)
{
throw new RuntimeException("Error loading texture.");
}
return textureHandle[0];
}
/**
* Replaces the fragment shader.
*/
public void changeFragmentShader(String fragmentShader) {
GLES20.glDeleteProgram(mProgram);
mProgram = createProgram(VERTEX_SHADER, fragmentShader);
if (mProgram == 0) {
throw new RuntimeException("failed creating program");
}
}
private int loadShader(int shaderType, String source) {
int shader = GLES20.glCreateShader(shaderType);
checkGlError("glCreateShader type=" + shaderType);
GLES20.glShaderSource(shader, source);
GLES20.glCompileShader(shader);
int[] compiled = new int[1];
GLES20.glGetShaderiv(shader, GLES20.GL_COMPILE_STATUS, compiled, 0);
if (compiled[0] == 0) {
Log.e(TAG, "Could not compile shader " + shaderType + ":");
Log.e(TAG, " " + GLES20.glGetShaderInfoLog(shader));
GLES20.glDeleteShader(shader);
shader = 0;
}
return shader;
}
private int createProgram(String vertexSource, String fragmentSource) {
int vertexShader = loadShader(GLES20.GL_VERTEX_SHADER, vertexSource);
if (vertexShader == 0) {
return 0;
}
int pixelShader = loadShader(GLES20.GL_FRAGMENT_SHADER, fragmentSource);
if (pixelShader == 0) {
return 0;
}
int program = GLES20.glCreateProgram();
checkGlError("glCreateProgram");
if (program == 0) {
Log.e(TAG, "Could not create program");
}
GLES20.glAttachShader(program, vertexShader);
checkGlError("glAttachShader");
GLES20.glAttachShader(program, pixelShader);
checkGlError("glAttachShader");
GLES20.glLinkProgram(program);
int[] linkStatus = new int[1];
GLES20.glGetProgramiv(program, GLES20.GL_LINK_STATUS, linkStatus, 0);
if (linkStatus[0] != GLES20.GL_TRUE) {
Log.e(TAG, "Could not link program: ");
Log.e(TAG, GLES20.glGetProgramInfoLog(program));
GLES20.glDeleteProgram(program);
program = 0;
}
return program;
}
public void checkGlError(String op) {
int error;
while ((error = GLES20.glGetError()) != GLES20.GL_NO_ERROR) {
Log.e(TAG, op + ": glError " + error);
throw new RuntimeException(op + ": glError " + error);
}
}
public void setBitmap(Bitmap bitmap){
this.bitmap = bitmap;
}
/**
* Saves the current frame to disk as a PNG image. Frame starts from (0,0).
* <p>
* Useful for debugging.
*/
public static void saveFrame(String filename, int width, int height) {
// glReadPixels gives us a ByteBuffer filled with what is essentially big-endian RGBA
// data (i.e. a byte of red, followed by a byte of green...). We need an int[] filled
// with native-order ARGB data to feed to Bitmap.
//
// If we implement this as a series of buf.get() calls, we can spend 2.5 seconds just
// copying data around for a 720p frame. It's better to do a bulk get() and then
// rearrange the data in memory. (For comparison, the PNG compress takes about 500ms
// for a trivial frame.)
//
// So... we set the ByteBuffer to little-endian, which should turn the bulk IntBuffer
// get() into a straight memcpy on most Android devices. Our ints will hold ABGR data.
// Swapping B and R gives us ARGB. We need about 30ms for the bulk get(), and another
// 270ms for the color swap.
//
// Making this even more interesting is the upside-down nature of GL, which means we
// may want to flip the image vertically here.
ByteBuffer buf = ByteBuffer.allocateDirect(width * height * 4);
buf.order(ByteOrder.LITTLE_ENDIAN);
GLES20.glReadPixels(0, 0, width, height, GLES20.GL_RGBA, GLES20.GL_UNSIGNED_BYTE, buf);
buf.rewind();
int pixelCount = width * height;
int[] colors = new int[pixelCount];
buf.asIntBuffer().get(colors);
for (int i = 0; i < pixelCount; i++) {
int c = colors[i];
colors[i] = (c & 0xff00ff00) | ((c & 0x00ff0000) >> 16) | ((c & 0x000000ff) << 16);
}
FileOutputStream fos = null;
try {
fos = new FileOutputStream(filename);
Bitmap bmp = Bitmap.createBitmap(colors, width, height, Bitmap.Config.ARGB_8888);
bmp.compress(Bitmap.CompressFormat.PNG, 90, fos);
bmp.recycle();
} catch (IOException ioe) {
throw new RuntimeException("Failed to write file " + filename, ioe);
} finally {
try {
if (fos != null) fos.close();
} catch (IOException ioe2) {
throw new RuntimeException("Failed to close file " + filename, ioe2);
}
}
Log.d(TAG, "Saved " + width + "x" + height + " frame as '" + filename + "'");
}
}
异常抛出:
E/ExtractDecodeEditEncodeMuxTest: error while releasing muxer
java.lang.IllegalStateException: Can't stop due to wrong state.
at android.media.MediaMuxer.stop(MediaMuxer.java:231)
at com.example.name.videoeditortest.ExtractDecodeEditEncodeMuxTest.extractDecodeEditEncodeMux(ExtractDecodeEditEncodeMuxTest.java 434)
at com.example.name.videoeditortest.ExtractDecodeEditEncodeMuxTest.access$000(ExtractDecodeEditEncodeMuxTest.java:58)
at com.example.name.videoeditortest.ExtractDecodeEditEncodeMuxTest$TestWrapper.run(ExtractDecodeEditEncodeMuxTest.java:171)
at java.lang.Thread.run(Thread.java:841)
如果我在 drawFrame 中评论 GLES20.glActiveTexture(GLES20.GL_TEXTURE_2D);,视频会正确呈现但没有位图。如果我在 drawFrame 中评论 GLES20.glBindTexture(GLES20.GL_TEXTURE_2D, mTextureBitmapID); 我会得到以下异常:
java.lang.RuntimeException: glVertexAttribPointer maPosition: glError 1280
at com.example.name.videoeditortest.TextureRender.checkGlError(TextureRender.java:259)
at com.example.name.videoeditortest.TextureRender.drawFrame(TextureRender.java:111)
at com.example.name.videoeditortest.OutputSurface.drawImage(OutputSurface.java:252)
at com.example.name.videoeditortest.ExtractDecodeEditEncodeMuxTest.doExtractDecodeEditEncodeMux(ExtractDecodeEditEncodeMuxTest.java:793)
at com.example.name.videoeditortest.ExtractDecodeEditEncodeMuxTest.extractDecodeEditEncodeMux(ExtractDecodeEditEncodeMuxTest.java:341)
at com.example.name.videoeditortest.ExtractDecodeEditEncodeMuxTest.access$000(ExtractDecodeEditEncodeMuxTest.java:58)
at com.example.name.videoeditortest.ExtractDecodeEditEncodeMuxTest$TestWrapper.run(ExtractDecodeEditEncodeMuxTest.java:171)
at java.lang.Thread.run(Thread.java:841)
最佳答案
我看到两件事对我来说似乎是错误的。
您正在尝试同时绑定(bind)所有内容,并希望一次调用 GLES20.glDrawArrays() 即可绘制所有内容。
您只有一个着色器,而您应该有两个:一个用于进行视频纹理渲染,另一个用于位图层渲染。
你必须知道的是,可以通过多次调用 glDrawArrays 来绘制一个框架,每次调用只会在之前绘制的东西上绘制一小部分(基本上)。
在您的情况下渲染框架的第一部分应该看这个:
# init
loadShaderForVideo()
loadShaderForBitmapLayer()
prepareYourArraysEtc()
...
#loop
GLClear()
updateVideoTexture()
drawFrame(){
drawVideo()
drawBitmap()
}
drawVideo(){
bindYourActiveTextureToVideo()
setYourVertexAttribAndUniform()
GLES20.glDrawArrays()
}
drawBitmap() {
bindYourActiveTextureToBitmap()
setYourVertexAttribAndUniform() // This should be the same as above for video
// Considering you want to draw above your video, consider activating the blending for transparency :
GLES20.glEnable(GLES20.GL_BLEND);
GLES20.glBlendFunc(GLES20.GL_SRC_ALPHA, GLES20.GL_ONE_MINUS_SRC_ALPHA);
GLES20.glDrawArrays()
}
关于着色器,看看这些:
一个通用的 Vertex Shader :
public static final String vertexDefaultShaderCode =
"uniform mat4 uVPMatrix;" +
"uniform mat4 uModelMatrix;" + // uniform = input const
"attribute vec3 aPosition;" + // attribute = input property different for each vertex
"attribute vec2 aTexCoordinate;" +
"varying vec2 vTexCoordinate;" +// varying = output property different for each pixel
"void main() {" +
"vTexCoordinate = aTexCoordinate;" +
"gl_Position = uVPMatrix * uModelMatrix * vec4(aPosition,1.0);" +
"}";
然后是基本的 fragment 着色器(用于位图 2D 纹理):
public static final String fragmentDefaultShaderCode =
"precision mediump float;" +
"uniform sampler2D uTexture;" +
"varying vec2 vTexCoordinate;" +
"void main() {" +
" gl_FragColor = texture2D(uTexture, vTexCoordinate);" +
"}";
然后是一个不同的视频渲染版本:
public static final String fragmentExternalShaderCode =
"#extension GL_OES_EGL_image_external : require\n" +
"precision mediump float;" +
"uniform samplerExternalOES sTexture;" +
"varying vec2 vTexCoordinate;" +
"void main() {" +
" gl_FragColor = texture2D(sTexture, vTexCoordinate);" +
"}";
因此您将需要两个程序,一个带有 defaultVertexShader + defaultFragmentShader,另一个带有 defaultVertexShader + fragmentExternalShaderCode。
关于java - Opengl es 2.0 在视频上绘制位图叠加,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/39589247/
我真的很习惯使用Ruby编写以下代码:my_hash={}my_hash['test']=1Java中对应的数据结构是什么? 最佳答案 HashMapmap=newHashMap();map.put("test",1);我假设? 关于java-等价于Java中的RubyHash,我们在StackOverflow上找到一个类似的问题: https://stackoverflow.com/questions/22737685/
我正在尝试使用boilerpipe来自JRuby。我看过guide从JRuby调用Java,并成功地将它与另一个Java包一起使用,但无法弄清楚为什么同样的东西不能用于boilerpipe。我正在尝试基本上从JRuby中执行与此Java等效的操作:URLurl=newURL("http://www.example.com/some-location/index.html");Stringtext=ArticleExtractor.INSTANCE.getText(url);在JRuby中试过这个:require'java'url=java.net.URL.new("http://www
我只想对我一直在思考的这个问题有其他意见,例如我有classuser_controller和classuserclassUserattr_accessor:name,:usernameendclassUserController//dosomethingaboutanythingaboutusersend问题是我的User类中是否应该有逻辑user=User.newuser.do_something(user1)oritshouldbeuser_controller=UserController.newuser_controller.do_something(user1,user2)我
按照目前的情况,这个问题不适合我们的问答形式。我们希望答案得到事实、引用或专业知识的支持,但这个问题可能会引发辩论、争论、投票或扩展讨论。如果您觉得这个问题可以改进并可能重新打开,visitthehelpcenter指导。关闭10年前。问题1)我想知道rubyonrails是否有功能类似于primefaces的gem。我问的原因是如果您使用primefaces(http://www.primefaces.org/showcase-labs/ui/home.jsf),开发人员无需担心javascript或jquery的东西。据我所知,JSF是一个规范,基于规范的各种可用实现,prim
什么是ruby的rack或python的Java的wsgi?还有一个路由库。 最佳答案 来自Python标准PEP333:Bycontrast,althoughJavahasjustasmanywebapplicationframeworksavailable,Java's"servlet"APImakesitpossibleforapplicationswrittenwithanyJavawebapplicationframeworktoruninanywebserverthatsupportstheservletAPI.ht
这篇文章是继上一篇文章“Observability:从零开始创建Java微服务并监控它(一)”的续篇。在上一篇文章中,我们讲述了如何创建一个Javaweb应用,并使用Filebeat来收集应用所生成的日志。在今天的文章中,我来详述如何收集应用的指标,使用APM来监控应用并监督web服务的在线情况。源码可以在地址 https://github.com/liu-xiao-guo/java_observability 进行下载。摄入指标指标被视为可以随时更改的时间点值。当前请求的数量可以改变任何毫秒。你可能有1000个请求的峰值,然后一切都回到一个请求。这也意味着这些指标可能不准确,你还想提取最小/
HashMap中为什么引入红黑树,而不是AVL树呢1.概述开始学习这个知识点之前我们需要知道,在JDK1.8以及之前,针对HashMap有什么不同。JDK1.7的时候,HashMap的底层实现是数组+链表JDK1.8的时候,HashMap的底层实现是数组+链表+红黑树我们要思考一个问题,为什么要从链表转为红黑树呢。首先先让我们了解下链表有什么不好???2.链表上述的截图其实就是链表的结构,我们来看下链表的增删改查的时间复杂度增:因为链表不是线性结构,所以每次添加的时候,只需要移动一个节点,所以可以理解为复杂度是N(1)删:算法时间复杂度跟增保持一致查:既然是非线性结构,所以查询某一个节点的时候
动漫制作技巧是很多新人想了解的问题,今天小编就来解答与大家分享一下动漫制作流程,为了帮助有兴趣的同学理解,大多数人会选择动漫培训机构,那么今天小编就带大家来看看动漫制作要掌握哪些技巧?一、动漫作品首先完成草图设计和原型制作。设计草图要有目的、有对象、有步骤、要形象、要简单、符合实际。设计图要一致性,以保证制作的顺利进行。二、原型制作是根据设计图纸和制作材料,可以是手绘也可以是3d软件创建。在此步骤中,要注意的问题是色彩和平面布局。三、动漫制作制作完成后,加工成型。完成不同的表现形式后,就要对设计稿进行加工处理,使加工的难易度降低,并得到一些基本准确的概念,以便于后续的大样、准确的尺寸制定。四、
2022/8/4更新支持加入水印水印必须包含透明图像,并且水印图像大小要等于原图像的大小pythonconvert_image_to_video.py-f30-mwatermark.pngim_dirout.mkv2022/6/21更新让命令行参数更加易用新的命令行使用方法pythonconvert_image_to_video.py-f30im_dirout.mkvFFMPEG命令行转换一组JPG图像到视频时,是将这组图像视为MJPG流。我需要转换一组PNG图像到视频,FFMPEG就不认了。pyav内置了ffmpeg库,不需要系统带有ffmpeg工具因此我使用ffmpeg的python包装p
Transformers开始在视频识别领域的“猪突猛进”,各种改进和魔改层出不穷。由此作者将开启VideoTransformer系列的讲解,本篇主要介绍了FBAI团队的TimeSformer,这也是第一篇使用纯Transformer结构在视频识别上的文章。如果觉得有用,就请点赞、收藏、关注!paper:https://arxiv.org/abs/2102.05095code(offical):https://github.com/facebookresearch/TimeSformeraccept:ICML2021author:FacebookAI一、前言Transformers(VIT)在图